gcc/gcse.c

   1 /* Global common subexpression elimination/Partial redundancy elimination
   2    and global constant/copy propagation for GNU compiler.
   3    Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /* TODO
  23    - reordering of memory allocation and freeing to be more space efficient
  24    - do rough calc of how many regs are needed in each block, and a rough
  25      calc of how many regs are available in each class and use that to
  26      throttle back the code in cases where RTX_COST is minimal.
  27    - dead store elimination
  28    - a store to the same address as a load does not kill the load if the
  29      source of the store is also the destination of the load.  Handling this
  30      allows more load motion, particularly out of loops.
  31    - ability to realloc sbitmap vectors would allow one initial computation
  32      of reg_set_in_block with only subsequent additions, rather than
  33      recomputing it for each pass
  34
  35 */
  36
  37 /* References searched while implementing this.
  38
  39    Compilers Principles, Techniques and Tools
  40    Aho, Sethi, Ullman
  41    Addison-Wesley, 1988
  42
  43    Global Optimization by Suppression of Partial Redundancies
  44    E. Morel, C. Renvoise
  45    communications of the acm, Vol. 22, Num. 2, Feb. 1979
  46
  47    A Portable Machine-Independent Global Optimizer - Design and Measurements
  48    Frederick Chow
  49    Stanford Ph.D. thesis, Dec. 1983
  50
  51    A Fast Algorithm for Code Movement Optimization
  52    D.M. Dhamdhere
  53    SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
  54
  55    A Solution to a Problem with Morel and Renvoise's
  56    Global Optimization by Suppression of Partial Redundancies
  57    K-H Drechsler, M.P. Stadel
  58    ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
  59
  60    Practical Adaptation of the Global Optimization
  61    Algorithm of Morel and Renvoise
  62    D.M. Dhamdhere
  63    ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
  64
  65    Efficiently Computing Static Single Assignment Form and the Control
  66    Dependence Graph
  67    R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
  68    ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
  69
  70    Lazy Code Motion
  71    J. Knoop, O. Ruthing, B. Steffen
  72    ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
  73
  74    What's In a Region?  Or Computing Control Dependence Regions in Near-Linear
  75    Time for Reducible Flow Control
  76    Thomas Ball
  77    ACM Letters on Programming Languages and Systems,
  78    Vol. 2, Num. 1-4, Mar-Dec 1993
  79
  80    An Efficient Representation for Sparse Sets
  81    Preston Briggs, Linda Torczon
  82    ACM Letters on Programming Languages and Systems,
  83    Vol. 2, Num. 1-4, Mar-Dec 1993
  84
  85    A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
  86    K-H Drechsler, M.P. Stadel
  87    ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
  88
  89    Partial Dead Code Elimination
  90    J. Knoop, O. Ruthing, B. Steffen
  91    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  92
  93    Effective Partial Redundancy Elimination
  94    P. Briggs, K.D. Cooper
  95    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  96
  97    The Program Structure Tree: Computing Control Regions in Linear Time
  98    R. Johnson, D. Pearson, K. Pingali
  99    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
 100
 101    Optimal Code Motion: Theory and Practice
 102    J. Knoop, O. Ruthing, B. Steffen
 103    ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
 104
 105    The power of assignment motion
 106    J. Knoop, O. Ruthing, B. Steffen
 107    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 108
 109    Global code motion / global value numbering
 110    C. Click
 111    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 112
 113    Value Driven Redundancy Elimination
 114    L.T. Simpson
 115    Rice University Ph.D. thesis, Apr. 1996
 116
 117    Value Numbering
 118    L.T. Simpson
 119    Massively Scalar Compiler Project, Rice University, Sep. 1996
 120
 121    High Performance Compilers for Parallel Computing
 122    Michael Wolfe
 123    Addison-Wesley, 1996
 124
 125    Advanced Compiler Design and Implementation
 126    Steven Muchnick
 127    Morgan Kaufmann, 1997
 128
 129    Building an Optimizing Compiler
 130    Robert Morgan
 131    Digital Press, 1998
 132
 133    People wishing to speed up the code here should read:
 134      Elimination Algorithms for Data Flow Analysis
 135      B.G. Ryder, M.C. Paull
 136      ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
 137
 138      How to Analyze Large Programs Efficiently and Informatively
 139      D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
 140      ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
 141
 142    People wishing to do something different can find various possibilities
 143    in the above papers and elsewhere.
 144 */
 145
 146 #include "config.h"
 147 #include "system.h"
 148 #include "toplev.h"
 149
 150 #include "rtl.h"
 151 #include "tm_p.h"
 152 #include "regs.h"
 153 #include "hard-reg-set.h"
 154 #include "flags.h"
 155 #include "real.h"
 156 #include "insn-config.h"
 157 #include "recog.h"
 158 #include "basic-block.h"
 159 #include "output.h"
 160 #include "function.h"
 161 #include "expr.h"
 162
 163 #include "obstack.h"
 164 #define obstack_chunk_alloc gmalloc
 165 #define obstack_chunk_free free
 166
 167 /* Maximum number of passes to perform.  */
 168 #define MAX_PASSES 1
 169
 170 /* Propagate flow information through back edges and thus enable PRE's
 171    moving loop invariant calculations out of loops.
 172
 173    Originally this tended to create worse overall code, but several
 174    improvements during the development of PRE seem to have made following
 175    back edges generally a win.
 176
 177    Note much of the loop invariant code motion done here would normally
 178    be done by loop.c, which has more heuristics for when to move invariants
 179    out of loops.  At some point we might need to move some of those
 180    heuristics into gcse.c.  */
 181 #define FOLLOW_BACK_EDGES 1
 182
 183 /* We support GCSE via Partial Redundancy Elimination.  PRE optimizations
 184    are a superset of those done by GCSE.
 185
 186    We perform the following steps:
 187
 188    1) Compute basic block information.
 189
 190    2) Compute table of places where registers are set.
 191
 192    3) Perform copy/constant propagation.
 193
 194    4) Perform global cse.
 195
 196    5) Perform another pass of copy/constant propagation.
 197
 198    Two passes of copy/constant propagation are done because the first one
 199    enables more GCSE and the second one helps to clean up the copies that
 200    GCSE creates.  This is needed more for PRE than for Classic because Classic
 201    GCSE will try to use an existing register containing the common
 202    subexpression rather than create a new one.  This is harder to do for PRE
 203    because of the code motion (which Classic GCSE doesn't do).
 204
 205    Expressions we are interested in GCSE-ing are of the form
 206    (set (pseudo-reg) (expression)).
 207    Function want_to_gcse_p says what these are.
 208
 209    PRE handles moving invariant expressions out of loops (by treating them as
 210    partially redundant).
 211
 212    Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
 213    assignment) based GVN (global value numbering).  L. T. Simpson's paper
 214    (Rice University) on value numbering is a useful reference for this.
 215
 216    **********************
 217
 218    We used to support multiple passes but there are diminishing returns in
 219    doing so.  The first pass usually makes 90% of the changes that are doable.
 220    A second pass can make a few more changes made possible by the first pass.
 221    Experiments show any further passes don't make enough changes to justify
 222    the expense.
 223
 224    A study of spec92 using an unlimited number of passes:
 225    [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
 226    [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
 227    [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
 228
 229    It was found doing copy propagation between each pass enables further
 230    substitutions.
 231
 232    PRE is quite expensive in complicated functions because the DFA can take
 233    awhile to converge.  Hence we only perform one pass.  Macro MAX_PASSES can
 234    be modified if one wants to experiment.
 235
 236    **********************
 237
 238    The steps for PRE are:
 239
 240    1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
 241
 242    2) Perform the data flow analysis for PRE.
 243
 244    3) Delete the redundant instructions
 245
 246    4) Insert the required copies [if any] that make the partially
 247       redundant instructions fully redundant.
 248
 249    5) For other reaching expressions, insert an instruction to copy the value
 250       to a newly created pseudo that will reach the redundant instruction.
 251
 252    The deletion is done first so that when we do insertions we
 253    know which pseudo reg to use.
 254
 255    Various papers have argued that PRE DFA is expensive (O(n^2)) and others
 256    argue it is not.  The number of iterations for the algorithm to converge
 257    is typically 2-4 so I don't view it as that expensive (relatively speaking).
 258
 259    PRE GCSE depends heavily on the second CSE pass to clean up the copies
 260    we create.  To make an expression reach the place where it's redundant,
 261    the result of the expression is copied to a new register, and the redundant
 262    expression is deleted by replacing it with this new register.  Classic GCSE
 263    doesn't have this problem as much as it computes the reaching defs of
 264    each register in each block and thus can try to use an existing register.
 265
 266    **********************
 267
 268    A fair bit of simplicity is created by creating small functions for simple
 269    tasks, even when the function is only called in one place.  This may
 270    measurably slow things down [or may not] by creating more function call
 271    overhead than is necessary.  The source is laid out so that it's trivial
 272    to make the affected functions inline so that one can measure what speed
 273    up, if any, can be achieved, and maybe later when things settle things can
 274    be rearranged.
 275
 276    Help stamp out big monolithic functions!  */
 277 \f
 278 /* GCSE global vars.  */
 279
 280 /* -dG dump file.  */
 281 static FILE *gcse_file;
 282
 283 /* Note whether or not we should run jump optimization after gcse.  We
 284    want to do this for two cases.
 285
 286     * If we changed any jumps via cprop.
 287
 288     * If we added any labels via edge splitting.  */
 289
 290 static int run_jump_opt_after_gcse;
 291
 292 /* Bitmaps are normally not included in debugging dumps.
 293    However it's useful to be able to print them from GDB.
 294    We could create special functions for this, but it's simpler to
 295    just allow passing stderr to the dump_foo fns.  Since stderr can
 296    be a macro, we store a copy here.  */
 297 static FILE *debug_stderr;
 298
 299 /* An obstack for our working variables.  */
 300 static struct obstack gcse_obstack;
 301
 302 /* Non-zero for each mode that supports (set (reg) (reg)).
 303    This is trivially true for integer and floating point values.
 304    It may or may not be true for condition codes.  */
 305 static char can_copy_p[(int) NUM_MACHINE_MODES];
 306
 307 /* Non-zero if can_copy_p has been initialized.  */
 308 static int can_copy_init_p;
 309
 310 struct reg_use {
 311   rtx reg_rtx;
 312 };
 313
 314 /* Hash table of expressions.  */
 315
 316 struct expr
 317 {
 318   /* The expression (SET_SRC for expressions, PATTERN for assignments).  */
 319   rtx expr;
 320   /* Index in the available expression bitmaps.  */
 321   int bitmap_index;
 322   /* Next entry with the same hash.  */
 323   struct expr *next_same_hash;
 324   /* List of anticipatable occurrences in basic blocks in the function.
 325      An "anticipatable occurrence" is one that is the first occurrence in the
 326      basic block, the operands are not modified in the basic block prior
 327      to the occurrence and the output is not used between the start of
 328      the block and the occurrence.  */
 329   struct occr *antic_occr;
 330   /* List of available occurrence in basic blocks in the function.
 331      An "available occurrence" is one that is the last occurrence in the
 332      basic block and the operands are not modified by following statements in
 333      the basic block [including this insn].  */
 334   struct occr *avail_occr;
 335   /* Non-null if the computation is PRE redundant.
 336      The value is the newly created pseudo-reg to record a copy of the
 337      expression in all the places that reach the redundant copy.  */
 338   rtx reaching_reg;
 339 };
 340
 341 /* Occurrence of an expression.
 342    There is one per basic block.  If a pattern appears more than once the
 343    last appearance is used [or first for anticipatable expressions].  */
 344
 345 struct occr
 346 {
 347   /* Next occurrence of this expression.  */
 348   struct occr *next;
 349   /* The insn that computes the expression.  */
 350   rtx insn;
 351   /* Non-zero if this [anticipatable] occurrence has been deleted.  */
 352   char deleted_p;
 353   /* Non-zero if this [available] occurrence has been copied to
 354      reaching_reg.  */
 355   /* ??? This is mutually exclusive with deleted_p, so they could share
 356      the same byte.  */
 357   char copied_p;
 358 };
 359
 360 /* Expression and copy propagation hash tables.
 361    Each hash table is an array of buckets.
 362    ??? It is known that if it were an array of entries, structure elements
 363    `next_same_hash' and `bitmap_index' wouldn't be necessary.  However, it is
 364    not clear whether in the final analysis a sufficient amount of memory would
 365    be saved as the size of the available expression bitmaps would be larger
 366    [one could build a mapping table without holes afterwards though].
 367    Someday I'll perform the computation and figure it out.
 368 */
 369
 370 /* Total size of the expression hash table, in elements.  */
 371 static int expr_hash_table_size;
 372 /* The table itself.
 373    This is an array of `expr_hash_table_size' elements.  */
 374 static struct expr **expr_hash_table;
 375
 376 /* Total size of the copy propagation hash table, in elements.  */
 377 static int set_hash_table_size;
 378 /* The table itself.
 379    This is an array of `set_hash_table_size' elements.  */
 380 static struct expr **set_hash_table;
 381
 382 /* Mapping of uids to cuids.
 383    Only real insns get cuids.  */
 384 static int *uid_cuid;
 385
 386 /* Highest UID in UID_CUID.  */
 387 static int max_uid;
 388
 389 /* Get the cuid of an insn.  */
 390 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
 391
 392 /* Number of cuids.  */
 393 static int max_cuid;
 394
 395 /* Mapping of cuids to insns.  */
 396 static rtx *cuid_insn;
 397
 398 /* Get insn from cuid.  */
 399 #define CUID_INSN(CUID) (cuid_insn[CUID])
 400
 401 /* Maximum register number in function prior to doing gcse + 1.
 402    Registers created during this pass have regno >= max_gcse_regno.
 403    This is named with "gcse" to not collide with global of same name.  */
 404 static int max_gcse_regno;
 405
 406 /* Maximum number of cse-able expressions found.  */
 407 static int n_exprs;
 408 /* Maximum number of assignments for copy propagation found.  */
 409 static int n_sets;
 410
 411 /* Table of registers that are modified.
 412    For each register, each element is a list of places where the pseudo-reg
 413    is set.
 414
 415    For simplicity, GCSE is done on sets of pseudo-regs only.  PRE GCSE only
 416    requires knowledge of which blocks kill which regs [and thus could use
 417    a bitmap instead of the lists `reg_set_table' uses].
 418
 419    `reg_set_table' and could be turned into an array of bitmaps
 420    (num-bbs x num-regs)
 421    [however perhaps it may be useful to keep the data as is].
 422    One advantage of recording things this way is that `reg_set_table' is
 423    fairly sparse with respect to pseudo regs but for hard regs could be
 424    fairly dense [relatively speaking].
 425    And recording sets of pseudo-regs in lists speeds
 426    up functions like compute_transp since in the case of pseudo-regs we only
 427    need to iterate over the number of times a pseudo-reg is set, not over the
 428    number of basic blocks [clearly there is a bit of a slow down in the cases
 429    where a pseudo is set more than once in a block, however it is believed
 430    that the net effect is to speed things up].  This isn't done for hard-regs
 431    because recording call-clobbered hard-regs in `reg_set_table' at each
 432    function call can consume a fair bit of memory, and iterating over hard-regs
 433    stored this way in compute_transp will be more expensive.  */
 434
 435 typedef struct reg_set {
 436   /* The next setting of this register.  */
 437   struct reg_set *next;
 438   /* The insn where it was set.  */
 439   rtx insn;
 440 } reg_set;
 441 static reg_set **reg_set_table;
 442 /* Size of `reg_set_table'.
 443    The table starts out at max_gcse_regno + slop, and is enlarged as
 444    necessary.  */
 445 static int reg_set_table_size;
 446 /* Amount to grow `reg_set_table' by when it's full.  */
 447 #define REG_SET_TABLE_SLOP 100
 448
 449 /* Bitmap containing one bit for each register in the program.
 450    Used when performing GCSE to track which registers have been set since
 451    the start of the basic block.  */
 452 static sbitmap reg_set_bitmap;
 453
 454 /* For each block, a bitmap of registers set in the block.
 455    This is used by expr_killed_p and compute_transp.
 456    It is computed during hash table computation and not by compute_sets
 457    as it includes registers added since the last pass (or between cprop and
 458    gcse) and it's currently not easy to realloc sbitmap vectors.  */
 459 static sbitmap *reg_set_in_block;
 460
 461 /* For each block, non-zero if memory is set in that block.
 462    This is computed during hash table computation and is used by
 463    expr_killed_p and compute_transp.
 464    ??? Handling of memory is very simple, we don't make any attempt
 465    to optimize things (later).
 466    ??? This can be computed by compute_sets since the information
 467    doesn't change.  */
 468 static char *mem_set_in_block;
 469
 470 /* Various variables for statistics gathering.  */
 471
 472 /* Memory used in a pass.
 473    This isn't intended to be absolutely precise.  Its intent is only
 474    to keep an eye on memory usage.  */
 475 static int bytes_used;
 476 /* GCSE substitutions made.  */
 477 static int gcse_subst_count;
 478 /* Number of copy instructions created.  */
 479 static int gcse_create_count;
 480 /* Number of constants propagated.  */
 481 static int const_prop_count;
 482 /* Number of copys propagated.  */
 483 static int copy_prop_count;
 484 \f
 485 /* These variables are used by classic GCSE.
 486    Normally they'd be defined a bit later, but `rd_gen' needs to
 487    be declared sooner.  */
 488
 489 /* A bitmap of all ones for implementing the algorithm for available
 490    expressions and reaching definitions.  */
 491 /* ??? Available expression bitmaps have a different size than reaching
 492    definition bitmaps.  This should be the larger of the two, however, it
 493    is not currently used for reaching definitions.  */
 494 static sbitmap u_bitmap;
 495
 496 /* Each block has a bitmap of each type.
 497    The length of each blocks bitmap is:
 498
 499        max_cuid  - for reaching definitions
 500        n_exprs - for available expressions
 501
 502    Thus we view the bitmaps as 2 dimensional arrays.  i.e.
 503    rd_kill[block_num][cuid_num]
 504    ae_kill[block_num][expr_num]
 505 */
 506
 507 /* For reaching defs */
 508 static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
 509
 510 /* for available exprs */
 511 static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
 512
 513 /* Objects of this type are passed around by the null-pointer check
 514    removal routines.  */
 515 struct null_pointer_info {
 516   /* The basic block being processed.  */
 517   int current_block;
 518   /* The first register to be handled in this pass.  */
 519   int min_reg;
 520   /* One greater than the last register to be handled in this pass.  */
 521   int max_reg;
 522   sbitmap *nonnull_local;
 523   sbitmap *nonnull_killed;
 524 };
 525 \f
 526 static void compute_can_copy      PARAMS ((void));
 527
 528 static char *gmalloc              PARAMS ((unsigned int));
 529 static char *grealloc            PARAMS ((char *, unsigned int));
 530 static char *gcse_alloc        PARAMS ((unsigned long));
 531 static void alloc_gcse_mem          PARAMS ((rtx));
 532 static void free_gcse_mem            PARAMS ((void));
 533 static void alloc_reg_set_mem    PARAMS ((int));
 534 static void free_reg_set_mem      PARAMS ((void));
 535 static int get_bitmap_width           PARAMS ((int, int, int));
 536 static void record_one_set          PARAMS ((int, rtx));
 537 static void record_set_info        PARAMS ((rtx, rtx, void *));
 538 static void compute_sets              PARAMS ((rtx));
 539
 540 static void hash_scan_insn          PARAMS ((rtx, int, int));
 541 static void hash_scan_set            PARAMS ((rtx, rtx, int));
 542 static void hash_scan_clobber    PARAMS ((rtx, rtx));
 543 static void hash_scan_call          PARAMS ((rtx, rtx));
 544 static int want_to_gcse_p            PARAMS ((rtx));
 545 static int oprs_unchanged_p        PARAMS ((rtx, rtx, int));
 546 static int oprs_anticipatable_p       PARAMS ((rtx, rtx));
 547 static int oprs_available_p        PARAMS ((rtx, rtx));
 548 static void insert_expr_in_table      PARAMS ((rtx, enum machine_mode,
 549                                               rtx, int, int));
 550 static void insert_set_in_table       PARAMS ((rtx, rtx));
 551 static unsigned int hash_expr    PARAMS ((rtx, enum machine_mode,
 552                                           int *, int));
 553 static unsigned int hash_expr_1       PARAMS ((rtx, enum machine_mode, int *));
 554 static unsigned int hash_set      PARAMS ((int, int));
 555 static int expr_equiv_p        PARAMS ((rtx, rtx));
 556 static void record_last_reg_set_info  PARAMS ((rtx, int));
 557 static void record_last_mem_set_info  PARAMS ((rtx));
 558 static void record_last_set_info      PARAMS ((rtx, rtx, void *));
 559 static void compute_hash_table  PARAMS ((int));
 560 static void alloc_set_hash_table      PARAMS ((int));
 561 static void free_set_hash_table       PARAMS ((void));
 562 static void compute_set_hash_table    PARAMS ((void));
 563 static void alloc_expr_hash_table     PARAMS ((int));
 564 static void free_expr_hash_table      PARAMS ((void));
 565 static void compute_expr_hash_table   PARAMS ((void));
 566 static void dump_hash_table        PARAMS ((FILE *, const char *,
 567                                             struct expr **, int, int));
 568 static struct expr *lookup_expr       PARAMS ((rtx));
 569 static struct expr *lookup_set  PARAMS ((int, rtx));
 570 static struct expr *next_set      PARAMS ((int, struct expr *));
 571 static void reset_opr_set_tables      PARAMS ((void));
 572 static int oprs_not_set_p            PARAMS ((rtx, rtx));
 573 static void mark_call            PARAMS ((rtx));
 574 static void mark_set              PARAMS ((rtx, rtx));
 575 static void mark_clobber              PARAMS ((rtx, rtx));
 576 static void mark_oprs_set            PARAMS ((rtx));
 577
 578 static void alloc_cprop_mem        PARAMS ((int, int));
 579 static void free_cprop_mem          PARAMS ((void));
 580 static void compute_transp          PARAMS ((rtx, int, sbitmap *, int));
 581 static void compute_transpout       PARAMS ((void));
 582 static void compute_local_properties  PARAMS ((sbitmap *, sbitmap *,
 583                                                sbitmap *, int));
 584 static void compute_cprop_data  PARAMS ((void));
 585 static void find_used_regs          PARAMS ((rtx));
 586 static int try_replace_reg          PARAMS ((rtx, rtx, rtx));
 587 static struct expr *find_avail_set    PARAMS ((int, rtx));
 588 static int cprop_jump                   PARAMS ((rtx, rtx, struct reg_use *, rtx));
 589 #ifdef HAVE_cc0
 590 static int cprop_cc0_jump               PARAMS ((rtx, struct reg_use *, rtx));
 591 #endif
 592 static int cprop_insn            PARAMS ((rtx, int));
 593 static int cprop                      PARAMS ((int));
 594 static int one_cprop_pass            PARAMS ((int, int));
 595
 596 static void alloc_pre_mem            PARAMS ((int, int));
 597 static void free_pre_mem              PARAMS ((void));
 598 static void compute_pre_data      PARAMS ((void));
 599 static int pre_expr_reaches_here_p    PARAMS ((int, struct expr *, int));
 600 static void insert_insn_end_bb  PARAMS ((struct expr *, int, int));
 601 static void pre_insert_copy_insn      PARAMS ((struct expr *, rtx));
 602 static void pre_insert_copies    PARAMS ((void));
 603 static int pre_delete            PARAMS ((void));
 604 static int pre_gcse                PARAMS ((void));
 605 static int one_pre_gcse_pass      PARAMS ((int));
 606
 607 static void add_label_notes           PARAMS ((rtx, rtx));
 608
 609 static void alloc_code_hoist_mem        PARAMS ((int, int));
 610 static void free_code_hoist_mem         PARAMS ((void));
 611 static void compute_code_hoist_vbeinout PARAMS ((void));
 612 static void compute_code_hoist_data     PARAMS ((void));
 613 static int hoist_expr_reaches_here_p    PARAMS ((int, int, int, char *));
 614 static void hoist_code                  PARAMS ((void));
 615 static int one_code_hoisting_pass       PARAMS ((void));
 616
 617 static void alloc_rd_mem              PARAMS ((int, int));
 618 static void free_rd_mem        PARAMS ((void));
 619 static void handle_rd_kill_set  PARAMS ((rtx, int, int));
 620 static void compute_kill_rd        PARAMS ((void));
 621 static void compute_rd          PARAMS ((void));
 622 static void alloc_avail_expr_mem      PARAMS ((int, int));
 623 static void free_avail_expr_mem       PARAMS ((void));
 624 static void compute_ae_gen          PARAMS ((void));
 625 static int expr_killed_p              PARAMS ((rtx, int));
 626 static void compute_ae_kill        PARAMS ((sbitmap *, sbitmap *));
 627 static int expr_reaches_here_p  PARAMS ((struct occr *, struct expr *,
 628                                          int, int));
 629 static rtx computing_insn            PARAMS ((struct expr *, rtx));
 630 static int def_reaches_here_p    PARAMS ((rtx, rtx));
 631 static int can_disregard_other_sets   PARAMS ((struct reg_set **, rtx, int));
 632 static int handle_avail_expr      PARAMS ((rtx, struct expr *));
 633 static int classic_gcse        PARAMS ((void));
 634 static int one_classic_gcse_pass      PARAMS ((int));
 635 static void invalidate_nonnull_info     PARAMS ((rtx, rtx, void *));
 636 static void delete_null_pointer_checks_1 PARAMS ((int *, sbitmap *, sbitmap *,
 637                                                   struct null_pointer_info *));
 638 static rtx process_insert_insn  PARAMS ((struct expr *));
 639 static int pre_edge_insert      PARAMS ((struct edge_list *, struct expr **));
 640 static int expr_reaches_here_p_work     PARAMS ((struct occr *, struct expr *,
 641                                                  int, int, char *));
 642 static int pre_expr_reaches_here_p_work PARAMS ((int, struct expr *,
 643                                                  int, char *));
 644 \f
 645 /* Entry point for global common subexpression elimination.
 646    F is the first instruction in the function.  */
 647
 648 int
 649 gcse_main (f, file)
 650      rtx f;
 651      FILE *file;
 652 {
 653   int changed, pass;
 654   /* Bytes used at start of pass.  */
 655   int initial_bytes_used;
 656   /* Maximum number of bytes used by a pass.  */
 657   int max_pass_bytes;
 658   /* Point to release obstack data from for each pass.  */
 659   char *gcse_obstack_bottom;
 660
 661   /* We do not construct an accurate cfg in functions which call
 662      setjmp, so just punt to be safe.  */
 663   if (current_function_calls_setjmp)
 664     return 0;
 665
 666   /* Assume that we do not need to run jump optimizations after gcse.  */
 667   run_jump_opt_after_gcse = 0;
 668
 669   /* For calling dump_foo fns from gdb.  */
 670   debug_stderr = stderr;
 671   gcse_file = file;
 672
 673   /* Identify the basic block information for this function, including
 674      successors and predecessors.  */
 675   max_gcse_regno = max_reg_num ();
 676   find_basic_blocks (f, max_gcse_regno, file);
 677   cleanup_cfg (f);
 678
 679   if (file)
 680     dump_flow_info (file);
 681
 682   /* Return if there's nothing to do.  */
 683   if (n_basic_blocks <= 1)
 684     {
 685       /* Free storage allocated by find_basic_blocks.  */
 686       free_basic_block_vars (0);
 687       return 0;
 688     }
 689
 690   /* Trying to perform global optimizations on flow graphs which have
 691      a high connectivity will take a long time and is unlikely to be
 692      particularly useful.
 693
 694      In normal circumstances a cfg should have about twice has many edges
 695      as blocks.  But we do not want to punish small functions which have
 696      a couple switch statements.  So we require a relatively large number
 697      of basic blocks and the ratio of edges to blocks to be high.  */
 698   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
 699     {
 700       /* Free storage allocated by find_basic_blocks.  */
 701       free_basic_block_vars (0);
 702       return 0;
 703     }
 704
 705   /* See what modes support reg/reg copy operations.  */
 706   if (! can_copy_init_p)
 707     {
 708       compute_can_copy ();
 709       can_copy_init_p = 1;
 710     }
 711
 712   gcc_obstack_init (&gcse_obstack);
 713   bytes_used = 0;
 714
 715   /* Record where pseudo-registers are set.
 716      This data is kept accurate during each pass.
 717      ??? We could also record hard-reg information here
 718      [since it's unchanging], however it is currently done during
 719      hash table computation.
 720
 721      It may be tempting to compute MEM set information here too, but MEM
 722      sets will be subject to code motion one day and thus we need to compute
 723      information about memory sets when we build the hash tables.  */
 724
 725   alloc_reg_set_mem (max_gcse_regno);
 726   compute_sets (f);
 727
 728   pass = 0;
 729   initial_bytes_used = bytes_used;
 730   max_pass_bytes = 0;
 731   gcse_obstack_bottom = gcse_alloc (1);
 732   changed = 1;
 733   while (changed && pass < MAX_PASSES)
 734     {
 735       changed = 0;
 736       if (file)
 737         fprintf (file, "GCSE pass %d\n\n", pass + 1);
 738
 739       /* Initialize bytes_used to the space for the pred/succ lists,
 740          and the reg_set_table data.  */
 741       bytes_used = initial_bytes_used;
 742
 743       /* Each pass may create new registers, so recalculate each time.  */
 744       max_gcse_regno = max_reg_num ();
 745
 746       alloc_gcse_mem (f);
 747
 748       /* Don't allow constant propagation to modify jumps
 749          during this pass.  */
 750       changed = one_cprop_pass (pass + 1, 0);
 751
 752       if (optimize_size)
 753         changed |= one_classic_gcse_pass (pass + 1);
 754       else
 755         {
 756           changed |= one_pre_gcse_pass (pass + 1);
 757           free_reg_set_mem ();
 758           alloc_reg_set_mem (max_reg_num ());
 759           compute_sets (f);
 760           run_jump_opt_after_gcse = 1;
 761         }
 762
 763       if (max_pass_bytes < bytes_used)
 764         max_pass_bytes = bytes_used;
 765
 766       /* Free up memory, then reallocate for code hoisting.  We can
 767          not re-use the existing allocated memory because the tables
 768          will not have info for the insns or registers created by
 769          partial redundancy elimination.  */
 770       free_gcse_mem ();
 771
 772       /* It does not make sense to run code hoisting unless we optimizing
 773          for code size -- it rarely makes programs faster, and can make
 774          them bigger if we did partial redundancy elimination (when optimizing
 775          for space, we use a classic gcse algorithm instead of partial
 776          redundancy algorithms).  */
 777       if (optimize_size)
 778         {
 779           max_gcse_regno = max_reg_num ();
 780           alloc_gcse_mem (f);
 781           changed |= one_code_hoisting_pass ();
 782           free_gcse_mem ();
 783
 784           if (max_pass_bytes < bytes_used)
 785             max_pass_bytes = bytes_used;
 786         }
 787
 788       if (file)
 789         {
 790           fprintf (file, "\n");
 791           fflush (file);
 792         }
 793       obstack_free (&gcse_obstack, gcse_obstack_bottom);
 794       pass++;
 795     }
 796
 797   /* Do one last pass of copy propagation, including cprop into
 798      conditional jumps.  */
 799
 800   max_gcse_regno = max_reg_num ();
 801   alloc_gcse_mem (f);
 802   /* This time, go ahead and allow cprop to alter jumps.  */
 803   one_cprop_pass (pass + 1, 1);
 804   free_gcse_mem ();
 805
 806   if (file)
 807     {
 808       fprintf (file, "GCSE of %s: %d basic blocks, ",
 809                current_function_name, n_basic_blocks);
 810       fprintf (file, "%d pass%s, %d bytes\n\n",
 811                pass, pass > 1 ? "es" : "", max_pass_bytes);
 812     }
 813
 814   /* Free our obstack.  */
 815   obstack_free (&gcse_obstack, NULL_PTR);
 816   /* Free reg_set_table.  */
 817   free_reg_set_mem ();
 818   /* Free storage allocated by find_basic_blocks.  */
 819   free_basic_block_vars (0);
 820   return run_jump_opt_after_gcse;
 821 }
 822 \f
 823 /* Misc. utilities.  */
 824
 825 /* Compute which modes support reg/reg copy operations.  */
 826
 827 static void
 828 compute_can_copy ()
 829 {
 830   int i;
 831 #ifndef AVOID_CCMODE_COPIES
 832   rtx reg,insn;
 833 #endif
 834   char *free_point = (char *) oballoc (1);
 835
 836   bzero (can_copy_p, NUM_MACHINE_MODES);
 837
 838   start_sequence ();
 839   for (i = 0; i < NUM_MACHINE_MODES; i++)
 840     {
 841       switch (GET_MODE_CLASS (i))
 842         {
 843         case MODE_CC :
 844 #ifdef AVOID_CCMODE_COPIES
 845           can_copy_p[i] = 0;
 846 #else
 847           reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
 848           insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
 849           if (recog (PATTERN (insn), insn, NULL_PTR) >= 0)
 850             can_copy_p[i] = 1;
 851 #endif
 852           break;
 853         default :
 854           can_copy_p[i] = 1;
 855           break;
 856         }
 857     }
 858   end_sequence ();
 859
 860   /* Free the objects we just allocated.  */
 861   obfree (free_point);
 862 }
 863 \f
 864 /* Cover function to xmalloc to record bytes allocated.  */
 865
 866 static char *
 867 gmalloc (size)
 868      unsigned int size;
 869 {
 870   bytes_used += size;
 871   return xmalloc (size);
 872 }
 873
 874 /* Cover function to xrealloc.
 875    We don't record the additional size since we don't know it.
 876    It won't affect memory usage stats much anyway.  */
 877
 878 static char *
 879 grealloc (ptr, size)
 880      char *ptr;
 881      unsigned int size;
 882 {
 883   return xrealloc (ptr, size);
 884 }
 885
 886 /* Cover function to obstack_alloc.
 887    We don't need to record the bytes allocated here since
 888    obstack_chunk_alloc is set to gmalloc.  */
 889
 890 static char *
 891 gcse_alloc (size)
 892      unsigned long size;
 893 {
 894   return (char *) obstack_alloc (&gcse_obstack, size);
 895 }
 896
 897 /* Allocate memory for the cuid mapping array,
 898    and reg/memory set tracking tables.
 899
 900    This is called at the start of each pass.  */
 901
 902 static void
 903 alloc_gcse_mem (f)
 904      rtx f;
 905 {
 906   int i,n;
 907   rtx insn;
 908
 909   /* Find the largest UID and create a mapping from UIDs to CUIDs.
 910      CUIDs are like UIDs except they increase monotonically, have no gaps,
 911      and only apply to real insns.  */
 912
 913   max_uid = get_max_uid ();
 914   n = (max_uid + 1) * sizeof (int);
 915   uid_cuid = (int *) gmalloc (n);
 916   bzero ((char *) uid_cuid, n);
 917   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 918     {
 919       if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
 920         INSN_CUID (insn) = i++;
 921       else
 922         INSN_CUID (insn) = i;
 923     }
 924
 925   /* Create a table mapping cuids to insns.  */
 926
 927   max_cuid = i;
 928   n = (max_cuid + 1) * sizeof (rtx);
 929   cuid_insn = (rtx *) gmalloc (n);
 930   bzero ((char *) cuid_insn, n);
 931   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 932     {
 933       if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
 934         {
 935           CUID_INSN (i) = insn;
 936           i++;
 937         }
 938     }
 939
 940   /* Allocate vars to track sets of regs.  */
 941
 942   reg_set_bitmap = (sbitmap) sbitmap_alloc (max_gcse_regno);
 943
 944   /* Allocate vars to track sets of regs, memory per block.  */
 945
 946   reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks,
 947                                                        max_gcse_regno);
 948   mem_set_in_block = (char *) gmalloc (n_basic_blocks);
 949 }
 950
 951 /* Free memory allocated by alloc_gcse_mem.  */
 952
 953 static void
 954 free_gcse_mem ()
 955 {
 956   free (uid_cuid);
 957   free (cuid_insn);
 958
 959   free (reg_set_bitmap);
 960
 961   free (reg_set_in_block);
 962   free (mem_set_in_block);
 963 }
 964
 965 /* Many of the global optimization algorithms work by solving dataflow
 966    equations for various expressions.  Initially, some local value is
 967    computed for each expression in each block.  Then, the values
 968    across the various blocks are combined (by following flow graph
 969    edges) to arrive at global values.  Conceptually, each set of
 970    equations is independent.  We may therefore solve all the equations
 971    in parallel, solve them one at a time, or pick any intermediate
 972    approach.
 973
 974    When you're going to need N two-dimensional bitmaps, each X (say,
 975    the number of blocks) by Y (say, the number of expressions), call
 976    this function.  It's not important what X and Y represent; only
 977    that Y correspond to the things that can be done in parallel.  This
 978    function will return an appropriate chunking factor C; you should
 979    solve C sets of equations in parallel.  By going through this
 980    function, we can easily trade space against time; by solving fewer
 981    equations in parallel we use less space.  */
 982
 983 static int
 984 get_bitmap_width (n, x, y)
 985      int n;
 986      int x;
 987      int y;
 988 {
 989   /* It's not really worth figuring out *exactly* how much memory will
 990      be used by a particular choice.  The important thing is to get
 991      something approximately right.  */
 992   size_t max_bitmap_memory = 10 * 1024 * 1024;
 993
 994   /* The number of bytes we'd use for a single column of minimum
 995      width.  */
 996   size_t column_size = n * x * sizeof (SBITMAP_ELT_TYPE);
 997
 998   /* Often, it's reasonable just to solve all the equations in
 999      parallel.  */
1000   if (column_size * SBITMAP_SET_SIZE (y) <= max_bitmap_memory)
1001     return y;
1002
1003   /* Otherwise, pick the largest width we can, without going over the
1004      limit.  */
1005   return SBITMAP_ELT_BITS * ((max_bitmap_memory + column_size - 1)
1006                              / column_size);
1007 }
1008
1009 \f
1010 /* Compute the local properties of each recorded expression.
1011    Local properties are those that are defined by the block, irrespective
1012    of other blocks.
1013
1014    An expression is transparent in a block if its operands are not modified
1015    in the block.
1016
1017    An expression is computed (locally available) in a block if it is computed
1018    at least once and expression would contain the same value if the
1019    computation was moved to the end of the block.
1020
1021    An expression is locally anticipatable in a block if it is computed at
1022    least once and expression would contain the same value if the computation
1023    was moved to the beginning of the block.
1024
1025    We call this routine for cprop, pre and code hoisting.  They all
1026    compute basically the same information and thus can easily share
1027    this code.
1028
1029    TRANSP, COMP, and ANTLOC are destination sbitmaps for recording
1030    local properties.  If NULL, then it is not necessary to compute
1031    or record that particular property.
1032
1033    SETP controls which hash table to look at.  If zero, this routine
1034    looks at the expr hash table; if nonzero this routine looks at
1035    the set hash table.  Additionally, TRANSP is computed as ~TRANSP,
1036    since this is really cprop's ABSALTERED.  */
1037
1038 static void
1039 compute_local_properties (transp, comp, antloc, setp)
1040      sbitmap *transp;
1041      sbitmap *comp;
1042      sbitmap *antloc;
1043      int setp;
1044 {
1045   int i, hash_table_size;
1046   struct expr **hash_table;
1047
1048   /* Initialize any bitmaps that were passed in.  */
1049   if (transp)
1050     {
1051       if (setp)
1052         sbitmap_vector_zero (transp, n_basic_blocks);
1053       else
1054         sbitmap_vector_ones (transp, n_basic_blocks);
1055     }
1056   if (comp)
1057     sbitmap_vector_zero (comp, n_basic_blocks);
1058   if (antloc)
1059     sbitmap_vector_zero (antloc, n_basic_blocks);
1060
1061   /* We use the same code for cprop, pre and hoisting.  For cprop
1062      we care about the set hash table, for pre and hoisting we
1063      care about the expr hash table.  */
1064   hash_table_size = setp ? set_hash_table_size : expr_hash_table_size;
1065   hash_table = setp ? set_hash_table : expr_hash_table;
1066
1067   for (i = 0; i < hash_table_size; i++)
1068     {
1069       struct expr *expr;
1070
1071       for (expr = hash_table[i]; expr != NULL; expr = expr->next_same_hash)
1072         {
1073           struct occr *occr;
1074           int indx = expr->bitmap_index;
1075
1076           /* The expression is transparent in this block if it is not killed.
1077              We start by assuming all are transparent [none are killed], and
1078              then reset the bits for those that are.  */
1079
1080           if (transp)
1081             compute_transp (expr->expr, indx, transp, setp);
1082
1083           /* The occurrences recorded in antic_occr are exactly those that
1084              we want to set to non-zero in ANTLOC.  */
1085
1086           if (antloc)
1087             {
1088               for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1089                 {
1090                   int bb = BLOCK_NUM (occr->insn);
1091                   SET_BIT (antloc[bb], indx);
1092
1093                   /* While we're scanning the table, this is a good place to
1094                      initialize this.  */
1095                   occr->deleted_p = 0;
1096                 }
1097             }
1098
1099           /* The occurrences recorded in avail_occr are exactly those that
1100              we want to set to non-zero in COMP.  */
1101           if (comp)
1102             {
1103
1104               for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
1105                 {
1106                   int bb = BLOCK_NUM (occr->insn);
1107                   SET_BIT (comp[bb], indx);
1108
1109                   /* While we're scanning the table, this is a good place to
1110                      initialize this.  */
1111                   occr->copied_p = 0;
1112                 }
1113             }
1114
1115           /* While we're scanning the table, this is a good place to
1116              initialize this.  */
1117           expr->reaching_reg = 0;
1118         }
1119     }
1120 }
1121
1122 \f
1123 /* Register set information.
1124
1125    `reg_set_table' records where each register is set or otherwise
1126    modified.  */
1127
1128 static struct obstack reg_set_obstack;
1129
1130 static void
1131 alloc_reg_set_mem (n_regs)
1132      int n_regs;
1133 {
1134   int n;
1135
1136   reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
1137   n = reg_set_table_size * sizeof (struct reg_set *);
1138   reg_set_table = (struct reg_set **) gmalloc (n);
1139   bzero ((char *) reg_set_table, n);
1140
1141   gcc_obstack_init (&reg_set_obstack);
1142 }
1143
1144 static void
1145 free_reg_set_mem ()
1146 {
1147   free (reg_set_table);
1148   obstack_free (&reg_set_obstack, NULL_PTR);
1149 }
1150
1151 /* Record REGNO in the reg_set table.  */
1152
1153 static void
1154 record_one_set (regno, insn)
1155      int regno;
1156      rtx insn;
1157 {
1158   /* allocate a new reg_set element and link it onto the list */
1159   struct reg_set *new_reg_info, *reg_info_ptr1, *reg_info_ptr2;
1160
1161   /* If the table isn't big enough, enlarge it.  */
1162   if (regno >= reg_set_table_size)
1163     {
1164       int new_size = regno + REG_SET_TABLE_SLOP;
1165       reg_set_table = (struct reg_set **)
1166         grealloc ((char *) reg_set_table,
1167                   new_size * sizeof (struct reg_set *));
1168       bzero ((char *) (reg_set_table + reg_set_table_size),
1169              (new_size - reg_set_table_size) * sizeof (struct reg_set *));
1170       reg_set_table_size = new_size;
1171     }
1172
1173   new_reg_info = (struct reg_set *) obstack_alloc (&reg_set_obstack,
1174                                                    sizeof (struct reg_set));
1175   bytes_used += sizeof (struct reg_set);
1176   new_reg_info->insn = insn;
1177   new_reg_info->next = NULL;
1178   if (reg_set_table[regno] == NULL)
1179     reg_set_table[regno] = new_reg_info;
1180   else
1181     {
1182       reg_info_ptr1 = reg_info_ptr2 = reg_set_table[regno];
1183       /* ??? One could keep a "last" pointer to speed this up.  */
1184       while (reg_info_ptr1 != NULL)
1185         {
1186           reg_info_ptr2 = reg_info_ptr1;
1187           reg_info_ptr1 = reg_info_ptr1->next;
1188         }
1189       reg_info_ptr2->next = new_reg_info;
1190     }
1191 }
1192
1193 /* Called from compute_sets via note_stores to handle one
1194    SET or CLOBBER in an insn.  The DATA is really the instruction
1195    in which the SET is occurring.  */
1196
1197 static void
1198 record_set_info (dest, setter, data)
1199      rtx dest, setter ATTRIBUTE_UNUSED;
1200      void *data;
1201 {
1202   rtx record_set_insn = (rtx) data;
1203
1204   if (GET_CODE (dest) == SUBREG)
1205     dest = SUBREG_REG (dest);
1206
1207   if (GET_CODE (dest) == REG)
1208     {
1209       if (REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1210         record_one_set (REGNO (dest), record_set_insn);
1211     }
1212 }
1213
1214 /* Scan the function and record each set of each pseudo-register.
1215
1216    This is called once, at the start of the gcse pass.
1217    See the comments for `reg_set_table' for further docs.  */
1218
1219 static void
1220 compute_sets (f)
1221      rtx f;
1222 {
1223   rtx insn = f;
1224
1225   while (insn)
1226     {
1227       if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
1228         note_stores (PATTERN (insn), record_set_info, insn);
1229       insn = NEXT_INSN (insn);
1230     }
1231 }
1232 \f
1233 /* Hash table support.  */
1234
1235 #define NEVER_SET -1
1236
1237 /* For each register, the cuid of the first/last insn in the block to set it,
1238    or -1 if not set.  */
1239 static int *reg_first_set;
1240 static int *reg_last_set;
1241
1242 /* While computing "first/last set" info, this is the CUID of first/last insn
1243    to set memory or -1 if not set.  `mem_last_set' is also used when
1244    performing GCSE to record whether memory has been set since the beginning
1245    of the block.
1246    Note that handling of memory is very simple, we don't make any attempt
1247    to optimize things (later).  */
1248 static int mem_first_set;
1249 static int mem_last_set;
1250
1251 /* Perform a quick check whether X, the source of a set, is something
1252    we want to consider for GCSE.  */
1253
1254 static int
1255 want_to_gcse_p (x)
1256      rtx x;
1257 {
1258   enum rtx_code code = GET_CODE (x);
1259
1260   switch (code)
1261     {
1262     case REG:
1263     case SUBREG:
1264     case CONST_INT:
1265     case CONST_DOUBLE:
1266     case CALL:
1267       return 0;
1268
1269     default:
1270       break;
1271     }
1272
1273   return 1;
1274 }
1275
1276 /* Return non-zero if the operands of expression X are unchanged from the
1277    start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1278    or from INSN to the end of INSN's basic block (if AVAIL_P != 0).  */
1279
1280 static int
1281 oprs_unchanged_p (x, insn, avail_p)
1282      rtx x, insn;
1283      int avail_p;
1284 {
1285   int i;
1286   enum rtx_code code;
1287   const char *fmt;
1288
1289   /* repeat is used to turn tail-recursion into iteration.  */
1290  repeat:
1291
1292   if (x == 0)
1293     return 1;
1294
1295   code = GET_CODE (x);
1296   switch (code)
1297     {
1298     case REG:
1299       if (avail_p)
1300         return (reg_last_set[REGNO (x)] == NEVER_SET
1301                 || reg_last_set[REGNO (x)] < INSN_CUID (insn));
1302       else
1303         return (reg_first_set[REGNO (x)] == NEVER_SET
1304                 || reg_first_set[REGNO (x)] >= INSN_CUID (insn));
1305
1306     case MEM:
1307       if (avail_p)
1308         {
1309           if (mem_last_set != NEVER_SET
1310               && mem_last_set >= INSN_CUID (insn))
1311             return 0;
1312         }
1313       else
1314         {
1315           if (mem_first_set != NEVER_SET
1316               && mem_first_set < INSN_CUID (insn))
1317             return 0;
1318         }
1319       x = XEXP (x, 0);
1320       goto repeat;
1321
1322     case PRE_DEC:
1323     case PRE_INC:
1324     case POST_DEC:
1325     case POST_INC:
1326       return 0;
1327
1328     case PC:
1329     case CC0: /*FIXME*/
1330     case CONST:
1331     case CONST_INT:
1332     case CONST_DOUBLE:
1333     case SYMBOL_REF:
1334     case LABEL_REF:
1335     case ADDR_VEC:
1336     case ADDR_DIFF_VEC:
1337       return 1;
1338
1339     default:
1340       break;
1341     }
1342
1343   i = GET_RTX_LENGTH (code) - 1;
1344   fmt = GET_RTX_FORMAT (code);
1345   for (; i >= 0; i--)
1346     {
1347       if (fmt[i] == 'e')
1348         {
1349           rtx tem = XEXP (x, i);
1350
1351           /* If we are about to do the last recursive call
1352              needed at this level, change it into iteration.
1353              This function is called enough to be worth it.  */
1354           if (i == 0)
1355             {
1356               x = tem;
1357               goto repeat;
1358             }
1359           if (! oprs_unchanged_p (tem, insn, avail_p))
1360             return 0;
1361         }
1362       else if (fmt[i] == 'E')
1363         {
1364           int j;
1365           for (j = 0; j < XVECLEN (x, i); j++)
1366             {
1367               if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1368                 return 0;
1369             }
1370         }
1371     }
1372
1373   return 1;
1374 }
1375
1376 /* Return non-zero if the operands of expression X are unchanged from
1377    the start of INSN's basic block up to but not including INSN.  */
1378
1379 static int
1380 oprs_anticipatable_p (x, insn)
1381      rtx x, insn;
1382 {
1383   return oprs_unchanged_p (x, insn, 0);
1384 }
1385
1386 /* Return non-zero if the operands of expression X are unchanged from
1387    INSN to the end of INSN's basic block.  */
1388
1389 static int
1390 oprs_available_p (x, insn)
1391      rtx x, insn;
1392 {
1393   return oprs_unchanged_p (x, insn, 1);
1394 }
1395
1396 /* Hash expression X.
1397    MODE is only used if X is a CONST_INT.
1398    A boolean indicating if a volatile operand is found or if the expression
1399    contains something we don't want to insert in the table is stored in
1400    DO_NOT_RECORD_P.
1401
1402    ??? One might want to merge this with canon_hash.  Later.  */
1403
1404 static unsigned int
1405 hash_expr (x, mode, do_not_record_p, hash_table_size)
1406      rtx x;
1407      enum machine_mode mode;
1408      int *do_not_record_p;
1409      int hash_table_size;
1410 {
1411   unsigned int hash;
1412
1413   *do_not_record_p = 0;
1414
1415   hash = hash_expr_1 (x, mode, do_not_record_p);
1416   return hash % hash_table_size;
1417 }
1418
1419 /* Subroutine of hash_expr to do the actual work.  */
1420
1421 static unsigned int
1422 hash_expr_1 (x, mode, do_not_record_p)
1423      rtx x;
1424      enum machine_mode mode;
1425      int *do_not_record_p;
1426 {
1427   int i, j;
1428   unsigned hash = 0;
1429   enum rtx_code code;
1430   const char *fmt;
1431
1432   /* repeat is used to turn tail-recursion into iteration.  */
1433  repeat:
1434
1435   if (x == 0)
1436     return hash;
1437
1438   code = GET_CODE (x);
1439   switch (code)
1440     {
1441     case REG:
1442       {
1443         register int regno = REGNO (x);
1444         hash += ((unsigned) REG << 7) + regno;
1445         return hash;
1446       }
1447
1448     case CONST_INT:
1449       {
1450         unsigned HOST_WIDE_INT tem = INTVAL (x);
1451         hash += ((unsigned) CONST_INT << 7) + (unsigned) mode + tem;
1452         return hash;
1453       }
1454
1455     case CONST_DOUBLE:
1456       /* This is like the general case, except that it only counts
1457          the integers representing the constant.  */
1458       hash += (unsigned) code + (unsigned) GET_MODE (x);
1459       if (GET_MODE (x) != VOIDmode)
1460         for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++)
1461           {
1462             unsigned tem = XWINT (x, i);
1463             hash += tem;
1464           }
1465       else
1466         hash += ((unsigned) CONST_DOUBLE_LOW (x)
1467                  + (unsigned) CONST_DOUBLE_HIGH (x));
1468       return hash;
1469
1470       /* Assume there is only one rtx object for any given label.  */
1471     case LABEL_REF:
1472       /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1473          differences and differences between each stage's debugging dumps.  */
1474       hash += ((unsigned) LABEL_REF << 7) + CODE_LABEL_NUMBER (XEXP (x, 0));
1475       return hash;
1476
1477     case SYMBOL_REF:
1478       {
1479         /* Don't hash on the symbol's address to avoid bootstrap differences.
1480            Different hash values may cause expressions to be recorded in
1481            different orders and thus different registers to be used in the
1482            final assembler.  This also avoids differences in the dump files
1483            between various stages.  */
1484         unsigned int h = 0;
1485         unsigned char *p = (unsigned char *) XSTR (x, 0);
1486         while (*p)
1487           h += (h << 7) + *p++; /* ??? revisit */
1488         hash += ((unsigned) SYMBOL_REF << 7) + h;
1489         return hash;
1490       }
1491
1492     case MEM:
1493       if (MEM_VOLATILE_P (x))
1494         {
1495           *do_not_record_p = 1;
1496           return 0;
1497         }
1498       hash += (unsigned) MEM;
1499       hash += MEM_ALIAS_SET (x);
1500       x = XEXP (x, 0);
1501       goto repeat;
1502
1503     case PRE_DEC:
1504     case PRE_INC:
1505     case POST_DEC:
1506     case POST_INC:
1507     case PC:
1508     case CC0:
1509     case CALL:
1510     case UNSPEC_VOLATILE:
1511       *do_not_record_p = 1;
1512       return 0;
1513
1514     case ASM_OPERANDS:
1515       if (MEM_VOLATILE_P (x))
1516         {
1517           *do_not_record_p = 1;
1518           return 0;
1519         }
1520
1521     default:
1522       break;
1523     }
1524
1525   i = GET_RTX_LENGTH (code) - 1;
1526   hash += (unsigned) code + (unsigned) GET_MODE (x);
1527   fmt = GET_RTX_FORMAT (code);
1528   for (; i >= 0; i--)
1529     {
1530       if (fmt[i] == 'e')
1531         {
1532           rtx tem = XEXP (x, i);
1533
1534           /* If we are about to do the last recursive call
1535              needed at this level, change it into iteration.
1536              This function is called enough to be worth it.  */
1537           if (i == 0)
1538             {
1539               x = tem;
1540               goto repeat;
1541             }
1542           hash += hash_expr_1 (tem, 0, do_not_record_p);
1543           if (*do_not_record_p)
1544             return 0;
1545         }
1546       else if (fmt[i] == 'E')
1547         for (j = 0; j < XVECLEN (x, i); j++)
1548           {
1549             hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p);
1550             if (*do_not_record_p)
1551               return 0;
1552           }
1553       else if (fmt[i] == 's')
1554         {
1555           register unsigned char *p = (unsigned char *) XSTR (x, i);
1556           if (p)
1557             while (*p)
1558               hash += *p++;
1559         }
1560       else if (fmt[i] == 'i')
1561         {
1562           register unsigned tem = XINT (x, i);
1563           hash += tem;
1564         }
1565       else
1566         abort ();
1567     }
1568
1569   return hash;
1570 }
1571
1572 /* Hash a set of register REGNO.
1573
1574    Sets are hashed on the register that is set.
1575    This simplifies the PRE copy propagation code.
1576
1577    ??? May need to make things more elaborate.  Later, as necessary.  */
1578
1579 static unsigned int
1580 hash_set (regno, hash_table_size)
1581      int regno;
1582      int hash_table_size;
1583 {
1584   unsigned int hash;
1585
1586   hash = regno;
1587   return hash % hash_table_size;
1588 }
1589
1590 /* Return non-zero if exp1 is equivalent to exp2.
1591    ??? Borrowed from cse.c.  Might want to remerge with cse.c.  Later.  */
1592
1593 static int
1594 expr_equiv_p (x, y)
1595      rtx x, y;
1596 {
1597   register int i, j;
1598   register enum rtx_code code;
1599   register const char *fmt;
1600
1601   if (x == y)
1602     return 1;
1603   if (x == 0 || y == 0)
1604     return x == y;
1605
1606   code = GET_CODE (x);
1607   if (code != GET_CODE (y))
1608     return 0;
1609
1610   /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent.  */
1611   if (GET_MODE (x) != GET_MODE (y))
1612     return 0;
1613
1614   switch (code)
1615     {
1616     case PC:
1617     case CC0:
1618       return x == y;
1619
1620     case CONST_INT:
1621       return INTVAL (x) == INTVAL (y);
1622
1623     case LABEL_REF:
1624       return XEXP (x, 0) == XEXP (y, 0);
1625
1626     case SYMBOL_REF:
1627       return XSTR (x, 0) == XSTR (y, 0);
1628
1629     case REG:
1630       return REGNO (x) == REGNO (y);
1631
1632     case MEM:
1633       /* Can't merge two expressions in different alias sets, since we can
1634          decide that the expression is transparent in a block when it isn't,
1635          due to it being set with the different alias set.  */
1636       if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y))
1637         return 0;
1638       break;
1639
1640     /*  For commutative operations, check both orders.  */
1641     case PLUS:
1642     case MULT:
1643     case AND:
1644     case IOR:
1645     case XOR:
1646     case NE:
1647     case EQ:
1648       return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0))
1649                && expr_equiv_p (XEXP (x, 1), XEXP (y, 1)))
1650               || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1))
1651                   && expr_equiv_p (XEXP (x, 1), XEXP (y, 0))));
1652
1653     default:
1654       break;
1655     }
1656
1657   /* Compare the elements.  If any pair of corresponding elements
1658      fail to match, return 0 for the whole thing.  */
1659
1660   fmt = GET_RTX_FORMAT (code);
1661   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1662     {
1663       switch (fmt[i])
1664         {
1665         case 'e':
1666           if (! expr_equiv_p (XEXP (x, i), XEXP (y, i)))
1667             return 0;
1668           break;
1669
1670         case 'E':
1671           if (XVECLEN (x, i) != XVECLEN (y, i))
1672             return 0;
1673           for (j = 0; j < XVECLEN (x, i); j++)
1674             if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j)))
1675               return 0;
1676           break;
1677
1678         case 's':
1679           if (strcmp (XSTR (x, i), XSTR (y, i)))
1680             return 0;
1681           break;
1682
1683         case 'i':
1684           if (XINT (x, i) != XINT (y, i))
1685             return 0;
1686           break;
1687
1688         case 'w':
1689           if (XWINT (x, i) != XWINT (y, i))
1690             return 0;
1691         break;
1692
1693         case '0':
1694           break;
1695
1696         default:
1697           abort ();
1698         }
1699       }
1700
1701   return 1;
1702 }
1703
1704 /* Insert expression X in INSN in the hash table.
1705    If it is already present, record it as the last occurrence in INSN's
1706    basic block.
1707
1708    MODE is the mode of the value X is being stored into.
1709    It is only used if X is a CONST_INT.
1710
1711    ANTIC_P is non-zero if X is an anticipatable expression.
1712    AVAIL_P is non-zero if X is an available expression.  */
1713
1714 static void
1715 insert_expr_in_table (x, mode, insn, antic_p, avail_p)
1716      rtx x;
1717      enum machine_mode mode;
1718      rtx insn;
1719      int antic_p, avail_p;
1720 {
1721   int found, do_not_record_p;
1722   unsigned int hash;
1723   struct expr *cur_expr, *last_expr = NULL;
1724   struct occr *antic_occr, *avail_occr;
1725   struct occr *last_occr = NULL;
1726
1727   hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size);
1728
1729   /* Do not insert expression in table if it contains volatile operands,
1730      or if hash_expr determines the expression is something we don't want
1731      to or can't handle.  */
1732   if (do_not_record_p)
1733     return;
1734
1735   cur_expr = expr_hash_table[hash];
1736   found = 0;
1737
1738   while (cur_expr && ! (found = expr_equiv_p (cur_expr->expr, x)))
1739     {
1740       /* If the expression isn't found, save a pointer to the end of
1741          the list.  */
1742       last_expr = cur_expr;
1743       cur_expr = cur_expr->next_same_hash;
1744     }
1745
1746   if (! found)
1747     {
1748       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1749       bytes_used += sizeof (struct expr);
1750       if (expr_hash_table[hash] == NULL)
1751         {
1752           /* This is the first pattern that hashed to this index.  */
1753           expr_hash_table[hash] = cur_expr;
1754         }
1755       else
1756         {
1757           /* Add EXPR to end of this hash chain.  */
1758           last_expr->next_same_hash = cur_expr;
1759         }
1760       /* Set the fields of the expr element.  */
1761       cur_expr->expr = x;
1762       cur_expr->bitmap_index = n_exprs++;
1763       cur_expr->next_same_hash = NULL;
1764       cur_expr->antic_occr = NULL;
1765       cur_expr->avail_occr = NULL;
1766     }
1767
1768   /* Now record the occurrence(s).  */
1769
1770   if (antic_p)
1771     {
1772       antic_occr = cur_expr->antic_occr;
1773
1774       /* Search for another occurrence in the same basic block.  */
1775       while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
1776         {
1777           /* If an occurrence isn't found, save a pointer to the end of
1778              the list.  */
1779           last_occr = antic_occr;
1780           antic_occr = antic_occr->next;
1781         }
1782
1783       if (antic_occr)
1784         {
1785           /* Found another instance of the expression in the same basic block.
1786              Prefer the currently recorded one.  We want the first one in the
1787              block and the block is scanned from start to end.  */
1788           ; /* nothing to do */
1789         }
1790       else
1791         {
1792           /* First occurrence of this expression in this basic block.  */
1793           antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1794           bytes_used += sizeof (struct occr);
1795           /* First occurrence of this expression in any block?  */
1796           if (cur_expr->antic_occr == NULL)
1797             cur_expr->antic_occr = antic_occr;
1798           else
1799             last_occr->next = antic_occr;
1800           antic_occr->insn = insn;
1801           antic_occr->next = NULL;
1802         }
1803     }
1804
1805   if (avail_p)
1806     {
1807       avail_occr = cur_expr->avail_occr;
1808
1809       /* Search for another occurrence in the same basic block.  */
1810       while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn))
1811         {
1812           /* If an occurrence isn't found, save a pointer to the end of
1813              the list.  */
1814           last_occr = avail_occr;
1815           avail_occr = avail_occr->next;
1816         }
1817
1818       if (avail_occr)
1819         {
1820           /* Found another instance of the expression in the same basic block.
1821              Prefer this occurrence to the currently recorded one.  We want
1822              the last one in the block and the block is scanned from start
1823              to end.  */
1824           avail_occr->insn = insn;
1825         }
1826       else
1827         {
1828           /* First occurrence of this expression in this basic block.  */
1829           avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1830           bytes_used += sizeof (struct occr);
1831           /* First occurrence of this expression in any block?  */
1832           if (cur_expr->avail_occr == NULL)
1833             cur_expr->avail_occr = avail_occr;
1834           else
1835             last_occr->next = avail_occr;
1836           avail_occr->insn = insn;
1837           avail_occr->next = NULL;
1838         }
1839     }
1840 }
1841
1842 /* Insert pattern X in INSN in the hash table.
1843    X is a SET of a reg to either another reg or a constant.
1844    If it is already present, record it as the last occurrence in INSN's
1845    basic block.  */
1846
1847 static void
1848 insert_set_in_table (x, insn)
1849      rtx x;
1850      rtx insn;
1851 {
1852   int found;
1853   unsigned int hash;
1854   struct expr *cur_expr, *last_expr = NULL;
1855   struct occr *cur_occr, *last_occr = NULL;
1856
1857   if (GET_CODE (x) != SET
1858       || GET_CODE (SET_DEST (x)) != REG)
1859     abort ();
1860
1861   hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size);
1862
1863   cur_expr = set_hash_table[hash];
1864   found = 0;
1865
1866   while (cur_expr && ! (found = expr_equiv_p (cur_expr->expr, x)))
1867     {
1868       /* If the expression isn't found, save a pointer to the end of
1869          the list.  */
1870       last_expr = cur_expr;
1871       cur_expr = cur_expr->next_same_hash;
1872     }
1873
1874   if (! found)
1875     {
1876       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1877       bytes_used += sizeof (struct expr);
1878       if (set_hash_table[hash] == NULL)
1879         {
1880           /* This is the first pattern that hashed to this index.  */
1881           set_hash_table[hash] = cur_expr;
1882         }
1883       else
1884         {
1885           /* Add EXPR to end of this hash chain.  */
1886           last_expr->next_same_hash = cur_expr;
1887         }
1888       /* Set the fields of the expr element.
1889          We must copy X because it can be modified when copy propagation is
1890          performed on its operands.  */
1891       /* ??? Should this go in a different obstack?  */
1892       cur_expr->expr = copy_rtx (x);
1893       cur_expr->bitmap_index = n_sets++;
1894       cur_expr->next_same_hash = NULL;
1895       cur_expr->antic_occr = NULL;
1896       cur_expr->avail_occr = NULL;
1897     }
1898
1899   /* Now record the occurrence.  */
1900
1901   cur_occr = cur_expr->avail_occr;
1902
1903   /* Search for another occurrence in the same basic block.  */
1904   while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn))
1905     {
1906       /* If an occurrence isn't found, save a pointer to the end of
1907          the list.  */
1908       last_occr = cur_occr;
1909       cur_occr = cur_occr->next;
1910     }
1911
1912   if (cur_occr)
1913     {
1914       /* Found another instance of the expression in the same basic block.
1915          Prefer this occurrence to the currently recorded one.  We want
1916          the last one in the block and the block is scanned from start
1917          to end.  */
1918       cur_occr->insn = insn;
1919     }
1920   else
1921     {
1922       /* First occurrence of this expression in this basic block.  */
1923       cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1924       bytes_used += sizeof (struct occr);
1925       /* First occurrence of this expression in any block?  */
1926       if (cur_expr->avail_occr == NULL)
1927         cur_expr->avail_occr = cur_occr;
1928       else
1929         last_occr->next = cur_occr;
1930       cur_occr->insn = insn;
1931       cur_occr->next = NULL;
1932     }
1933 }
1934
1935 /* Scan pattern PAT of INSN and add an entry to the hash table.
1936    If SET_P is non-zero, this is for the assignment hash table,
1937    otherwise it is for the expression hash table.  */
1938
1939 static void
1940 hash_scan_set (pat, insn, set_p)
1941      rtx pat, insn;
1942      int set_p;
1943 {
1944   rtx src = SET_SRC (pat);
1945   rtx dest = SET_DEST (pat);
1946
1947   if (GET_CODE (src) == CALL)
1948     hash_scan_call (src, insn);
1949
1950   if (GET_CODE (dest) == REG)
1951     {
1952       int regno = REGNO (dest);
1953       rtx tmp;
1954
1955       /* Only record sets of pseudo-regs in the hash table.  */
1956       if (! set_p
1957           && regno >= FIRST_PSEUDO_REGISTER
1958           /* Don't GCSE something if we can't do a reg/reg copy.  */
1959           && can_copy_p [GET_MODE (dest)]
1960           /* Is SET_SRC something we want to gcse?  */
1961           && want_to_gcse_p (src))
1962         {
1963           /* An expression is not anticipatable if its operands are
1964              modified before this insn.  */
1965           int antic_p = oprs_anticipatable_p (src, insn);
1966           /* An expression is not available if its operands are
1967              subsequently modified, including this insn.  */
1968           int avail_p = oprs_available_p (src, insn);
1969           insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p);
1970         }
1971       /* Record sets for constant/copy propagation.  */
1972       else if (set_p
1973                && regno >= FIRST_PSEUDO_REGISTER
1974                && ((GET_CODE (src) == REG
1975                     && REGNO (src) >= FIRST_PSEUDO_REGISTER
1976                     && can_copy_p [GET_MODE (dest)])
1977                    || GET_CODE (src) == CONST_INT
1978                    || GET_CODE (src) == SYMBOL_REF
1979                    || GET_CODE (src) == CONST_DOUBLE)
1980                /* A copy is not available if its src or dest is subsequently
1981                   modified.  Here we want to search from INSN+1 on, but
1982                   oprs_available_p searches from INSN on.  */
1983                && (insn == BLOCK_END (BLOCK_NUM (insn))
1984                    || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
1985                        && oprs_available_p (pat, tmp))))
1986         insert_set_in_table (pat, insn);
1987     }
1988 }
1989
1990 static void
1991 hash_scan_clobber (x, insn)
1992      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1993 {
1994   /* Currently nothing to do.  */
1995 }
1996
1997 static void
1998 hash_scan_call (x, insn)
1999      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
2000 {
2001   /* Currently nothing to do.  */
2002 }
2003
2004 /* Process INSN and add hash table entries as appropriate.
2005
2006    Only available expressions that set a single pseudo-reg are recorded.
2007
2008    Single sets in a PARALLEL could be handled, but it's an extra complication
2009    that isn't dealt with right now.  The trick is handling the CLOBBERs that
2010    are also in the PARALLEL.  Later.
2011
2012    If SET_P is non-zero, this is for the assignment hash table,
2013    otherwise it is for the expression hash table.
2014    If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
2015    not record any expressions.  */
2016
2017 static void
2018 hash_scan_insn (insn, set_p, in_libcall_block)
2019      rtx insn;
2020      int set_p;
2021      int in_libcall_block;
2022 {
2023   rtx pat = PATTERN (insn);
2024
2025   /* Pick out the sets of INSN and for other forms of instructions record
2026      what's been modified.  */
2027
2028   if (GET_CODE (pat) == SET && ! in_libcall_block)
2029     {
2030       /* Ignore obvious no-ops.  */
2031       if (SET_SRC (pat) != SET_DEST (pat))
2032         hash_scan_set (pat, insn, set_p);
2033     }
2034   else if (GET_CODE (pat) == PARALLEL)
2035     {
2036       int i;
2037
2038       for (i = 0; i < XVECLEN (pat, 0); i++)
2039         {
2040           rtx x = XVECEXP (pat, 0, i);
2041
2042           if (GET_CODE (x) == SET)
2043             {
2044               if (GET_CODE (SET_SRC (x)) == CALL)
2045                 hash_scan_call (SET_SRC (x), insn);
2046             }
2047           else if (GET_CODE (x) == CLOBBER)
2048             hash_scan_clobber (x, insn);
2049           else if (GET_CODE (x) == CALL)
2050             hash_scan_call (x, insn);
2051         }
2052     }
2053   else if (GET_CODE (pat) == CLOBBER)
2054     hash_scan_clobber (pat, insn);
2055   else if (GET_CODE (pat) == CALL)
2056     hash_scan_call (pat, insn);
2057 }
2058
2059 static void
2060 dump_hash_table (file, name, table, table_size, total_size)
2061      FILE *file;
2062      const char *name;
2063      struct expr **table;
2064      int table_size, total_size;
2065 {
2066   int i;
2067   /* Flattened out table, so it's printed in proper order.  */
2068   struct expr **flat_table;
2069   unsigned int *hash_val;
2070
2071   flat_table
2072     = (struct expr **) xcalloc (total_size, sizeof (struct expr *));
2073   hash_val = (unsigned int *) xmalloc (total_size * sizeof (unsigned int));
2074
2075   for (i = 0; i < table_size; i++)
2076     {
2077       struct expr *expr;
2078
2079       for (expr = table[i]; expr != NULL; expr = expr->next_same_hash)
2080         {
2081           flat_table[expr->bitmap_index] = expr;
2082           hash_val[expr->bitmap_index] = i;
2083         }
2084     }
2085
2086   fprintf (file, "%s hash table (%d buckets, %d entries)\n",
2087            name, table_size, total_size);
2088
2089   for (i = 0; i < total_size; i++)
2090     {
2091       struct expr *expr = flat_table[i];
2092
2093       fprintf (file, "Index %d (hash value %d)\n  ",
2094                expr->bitmap_index, hash_val[i]);
2095       print_rtl (file, expr->expr);
2096       fprintf (file, "\n");
2097     }
2098
2099   fprintf (file, "\n");
2100
2101   /* Clean up.  */
2102   free (flat_table);
2103   free (hash_val);
2104 }
2105
2106 /* Record register first/last/block set information for REGNO in INSN.
2107    reg_first_set records the first place in the block where the register
2108    is set and is used to compute "anticipatability".
2109    reg_last_set records the last place in the block where the register
2110    is set and is used to compute "availability".
2111    reg_set_in_block records whether the register is set in the block
2112    and is used to compute "transparency".  */
2113
2114 static void
2115 record_last_reg_set_info (insn, regno)
2116      rtx insn;
2117      int regno;
2118 {
2119   if (reg_first_set[regno] == NEVER_SET)
2120     reg_first_set[regno] = INSN_CUID (insn);
2121   reg_last_set[regno] = INSN_CUID (insn);
2122   SET_BIT (reg_set_in_block[BLOCK_NUM (insn)], regno);
2123 }
2124
2125 /* Record memory first/last/block set information for INSN.  */
2126
2127 static void
2128 record_last_mem_set_info (insn)
2129      rtx insn;
2130 {
2131   if (mem_first_set == NEVER_SET)
2132     mem_first_set = INSN_CUID (insn);
2133   mem_last_set = INSN_CUID (insn);
2134   mem_set_in_block[BLOCK_NUM (insn)] = 1;
2135 }
2136
2137 /* Called from compute_hash_table via note_stores to handle one
2138    SET or CLOBBER in an insn.  DATA is really the instruction in which
2139    the SET is taking place.  */
2140
2141 static void
2142 record_last_set_info (dest, setter, data)
2143      rtx dest, setter ATTRIBUTE_UNUSED;
2144      void *data;
2145 {
2146   rtx last_set_insn = (rtx) data;
2147
2148   if (GET_CODE (dest) == SUBREG)
2149     dest = SUBREG_REG (dest);
2150
2151   if (GET_CODE (dest) == REG)
2152     record_last_reg_set_info (last_set_insn, REGNO (dest));
2153   else if (GET_CODE (dest) == MEM
2154            /* Ignore pushes, they clobber nothing.  */
2155            && ! push_operand (dest, GET_MODE (dest)))
2156     record_last_mem_set_info (last_set_insn);
2157 }
2158
2159 /* Top level function to create an expression or assignment hash table.
2160
2161    Expression entries are placed in the hash table if
2162    - they are of the form (set (pseudo-reg) src),
2163    - src is something we want to perform GCSE on,
2164    - none of the operands are subsequently modified in the block
2165
2166    Assignment entries are placed in the hash table if
2167    - they are of the form (set (pseudo-reg) src),
2168    - src is something we want to perform const/copy propagation on,
2169    - none of the operands or target are subsequently modified in the block
2170    Currently src must be a pseudo-reg or a const_int.
2171
2172    F is the first insn.
2173    SET_P is non-zero for computing the assignment hash table.  */
2174
2175 static void
2176 compute_hash_table (set_p)
2177      int set_p;
2178 {
2179   int bb;
2180
2181   /* While we compute the hash table we also compute a bit array of which
2182      registers are set in which blocks.
2183      We also compute which blocks set memory, in the absence of aliasing
2184      support [which is TODO].
2185      ??? This isn't needed during const/copy propagation, but it's cheap to
2186      compute.  Later.  */
2187   sbitmap_vector_zero (reg_set_in_block, n_basic_blocks);
2188   bzero ((char *) mem_set_in_block, n_basic_blocks);
2189
2190   /* Some working arrays used to track first and last set in each block.  */
2191   /* ??? One could use alloca here, but at some size a threshold is crossed
2192      beyond which one should use malloc.  Are we at that threshold here?  */
2193   reg_first_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2194   reg_last_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2195
2196   for (bb = 0; bb < n_basic_blocks; bb++)
2197     {
2198       rtx insn;
2199       int regno;
2200       int in_libcall_block;
2201       int i;
2202
2203       /* First pass over the instructions records information used to
2204          determine when registers and memory are first and last set.
2205          ??? The mem_set_in_block and hard-reg reg_set_in_block computation
2206          could be moved to compute_sets since they currently don't change.  */
2207
2208       for (i = 0; i < max_gcse_regno; i++)
2209         reg_first_set[i] = reg_last_set[i] = NEVER_SET;
2210       mem_first_set = NEVER_SET;
2211       mem_last_set = NEVER_SET;
2212
2213       for (insn = BLOCK_HEAD (bb);
2214            insn && insn != NEXT_INSN (BLOCK_END (bb));
2215            insn = NEXT_INSN (insn))
2216         {
2217 #ifdef NON_SAVING_SETJMP
2218           if (NON_SAVING_SETJMP && GET_CODE (insn) == NOTE
2219               && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)
2220             {
2221               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2222                 record_last_reg_set_info (insn, regno);
2223               continue;
2224             }
2225 #endif
2226
2227           if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
2228             continue;
2229
2230           if (GET_CODE (insn) == CALL_INSN)
2231             {
2232               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2233                 if ((call_used_regs[regno]
2234                      && regno != STACK_POINTER_REGNUM
2235 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2236                      && regno != HARD_FRAME_POINTER_REGNUM
2237 #endif
2238 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2239                      && ! (regno == ARG_POINTER_REGNUM && fixed_regs[regno])
2240 #endif
2241 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2242                      && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2243 #endif
2244
2245                      && regno != FRAME_POINTER_REGNUM)
2246                     || global_regs[regno])
2247                   record_last_reg_set_info (insn, regno);
2248               if (! CONST_CALL_P (insn))
2249                 record_last_mem_set_info (insn);
2250             }
2251
2252           note_stores (PATTERN (insn), record_last_set_info, insn);
2253         }
2254
2255       /* The next pass builds the hash table.  */
2256
2257       for (insn = BLOCK_HEAD (bb), in_libcall_block = 0;
2258            insn && insn != NEXT_INSN (BLOCK_END (bb));
2259            insn = NEXT_INSN (insn))
2260         {
2261           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
2262             {
2263               if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2264                 in_libcall_block = 1;
2265               else if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
2266                 in_libcall_block = 0;
2267               hash_scan_insn (insn, set_p, in_libcall_block);
2268             }
2269         }
2270     }
2271
2272   free (reg_first_set);
2273   free (reg_last_set);
2274   /* Catch bugs early.  */
2275   reg_first_set = reg_last_set = 0;
2276 }
2277
2278 /* Allocate space for the set hash table.
2279    N_INSNS is the number of instructions in the function.
2280    It is used to determine the number of buckets to use.  */
2281
2282 static void
2283 alloc_set_hash_table (n_insns)
2284      int n_insns;
2285 {
2286   int n;
2287
2288   set_hash_table_size = n_insns / 4;
2289   if (set_hash_table_size < 11)
2290     set_hash_table_size = 11;
2291   /* Attempt to maintain efficient use of hash table.
2292      Making it an odd number is simplest for now.
2293      ??? Later take some measurements.  */
2294   set_hash_table_size |= 1;
2295   n = set_hash_table_size * sizeof (struct expr *);
2296   set_hash_table = (struct expr **) gmalloc (n);
2297 }
2298
2299 /* Free things allocated by alloc_set_hash_table.  */
2300
2301 static void
2302 free_set_hash_table ()
2303 {
2304   free (set_hash_table);
2305 }
2306
2307 /* Compute the hash table for doing copy/const propagation.  */
2308
2309 static void
2310 compute_set_hash_table ()
2311 {
2312   /* Initialize count of number of entries in hash table.  */
2313   n_sets = 0;
2314   bzero ((char *) set_hash_table, set_hash_table_size * sizeof (struct expr *));
2315
2316   compute_hash_table (1);
2317 }
2318
2319 /* Allocate space for the expression hash table.
2320    N_INSNS is the number of instructions in the function.
2321    It is used to determine the number of buckets to use.  */
2322
2323 static void
2324 alloc_expr_hash_table (n_insns)
2325      int n_insns;
2326 {
2327   int n;
2328
2329   expr_hash_table_size = n_insns / 2;
2330   /* Make sure the amount is usable.  */
2331   if (expr_hash_table_size < 11)
2332     expr_hash_table_size = 11;
2333   /* Attempt to maintain efficient use of hash table.
2334      Making it an odd number is simplest for now.
2335      ??? Later take some measurements.  */
2336   expr_hash_table_size |= 1;
2337   n = expr_hash_table_size * sizeof (struct expr *);
2338   expr_hash_table = (struct expr **) gmalloc (n);
2339 }
2340
2341 /* Free things allocated by alloc_expr_hash_table.  */
2342
2343 static void
2344 free_expr_hash_table ()
2345 {
2346   free (expr_hash_table);
2347 }
2348
2349 /* Compute the hash table for doing GCSE.  */
2350
2351 static void
2352 compute_expr_hash_table ()
2353 {
2354   /* Initialize count of number of entries in hash table.  */
2355   n_exprs = 0;
2356   bzero ((char *) expr_hash_table, expr_hash_table_size * sizeof (struct expr *));
2357
2358   compute_hash_table (0);
2359 }
2360 \f
2361 /* Expression tracking support.  */
2362
2363 /* Lookup pattern PAT in the expression table.
2364    The result is a pointer to the table entry, or NULL if not found.  */
2365
2366 static struct expr *
2367 lookup_expr (pat)
2368      rtx pat;
2369 {
2370   int do_not_record_p;
2371   unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p,
2372                                  expr_hash_table_size);
2373   struct expr *expr;
2374
2375   if (do_not_record_p)
2376     return NULL;
2377
2378   expr = expr_hash_table[hash];
2379
2380   while (expr && ! expr_equiv_p (expr->expr, pat))
2381     expr = expr->next_same_hash;
2382
2383   return expr;
2384 }
2385
2386 /* Lookup REGNO in the set table.
2387    If PAT is non-NULL look for the entry that matches it, otherwise return
2388    the first entry for REGNO.
2389    The result is a pointer to the table entry, or NULL if not found.  */
2390
2391 static struct expr *
2392 lookup_set (regno, pat)
2393      int regno;
2394      rtx pat;
2395 {
2396   unsigned int hash = hash_set (regno, set_hash_table_size);
2397   struct expr *expr;
2398
2399   expr = set_hash_table[hash];
2400
2401   if (pat)
2402     {
2403       while (expr && ! expr_equiv_p (expr->expr, pat))
2404         expr = expr->next_same_hash;
2405     }
2406   else
2407     {
2408       while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2409         expr = expr->next_same_hash;
2410     }
2411
2412   return expr;
2413 }
2414
2415 /* Return the next entry for REGNO in list EXPR.  */
2416
2417 static struct expr *
2418 next_set (regno, expr)
2419      int regno;
2420      struct expr *expr;
2421 {
2422   do
2423     expr = expr->next_same_hash;
2424   while (expr && REGNO (SET_DEST (expr->expr)) != regno);
2425   return expr;
2426 }
2427
2428 /* Reset tables used to keep track of what's still available [since the
2429    start of the block].  */
2430
2431 static void
2432 reset_opr_set_tables ()
2433 {
2434   /* Maintain a bitmap of which regs have been set since beginning of
2435      the block.  */
2436   sbitmap_zero (reg_set_bitmap);
2437   /* Also keep a record of the last instruction to modify memory.
2438      For now this is very trivial, we only record whether any memory
2439      location has been modified.  */
2440   mem_last_set = 0;
2441 }
2442
2443 /* Return non-zero if the operands of X are not set before INSN in
2444    INSN's basic block.  */
2445
2446 static int
2447 oprs_not_set_p (x, insn)
2448      rtx x, insn;
2449 {
2450   int i;
2451   enum rtx_code code;
2452   const char *fmt;
2453
2454   /* repeat is used to turn tail-recursion into iteration.  */
2455 repeat:
2456
2457   if (x == 0)
2458     return 1;
2459
2460   code = GET_CODE (x);
2461   switch (code)
2462     {
2463     case PC:
2464     case CC0:
2465     case CONST:
2466     case CONST_INT:
2467     case CONST_DOUBLE:
2468     case SYMBOL_REF:
2469     case LABEL_REF:
2470     case ADDR_VEC:
2471     case ADDR_DIFF_VEC:
2472       return 1;
2473
2474     case MEM:
2475       if (mem_last_set != 0)
2476         return 0;
2477       x = XEXP (x, 0);
2478       goto repeat;
2479
2480     case REG:
2481       return ! TEST_BIT (reg_set_bitmap, REGNO (x));
2482
2483     default:
2484       break;
2485     }
2486
2487   fmt = GET_RTX_FORMAT (code);
2488   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2489     {
2490       if (fmt[i] == 'e')
2491         {
2492           int not_set_p;
2493           /* If we are about to do the last recursive call
2494              needed at this level, change it into iteration.
2495              This function is called enough to be worth it.  */
2496           if (i == 0)
2497             {
2498               x = XEXP (x, 0);
2499               goto repeat;
2500             }
2501           not_set_p = oprs_not_set_p (XEXP (x, i), insn);
2502           if (! not_set_p)
2503             return 0;
2504         }
2505       else if (fmt[i] == 'E')
2506         {
2507           int j;
2508           for (j = 0; j < XVECLEN (x, i); j++)
2509             {
2510               int not_set_p = oprs_not_set_p (XVECEXP (x, i, j), insn);
2511               if (! not_set_p)
2512                 return 0;
2513             }
2514         }
2515     }
2516
2517   return 1;
2518 }
2519
2520 /* Mark things set by a CALL.  */
2521
2522 static void
2523 mark_call (insn)
2524      rtx insn;
2525 {
2526   mem_last_set = INSN_CUID (insn);
2527 }
2528
2529 /* Mark things set by a SET.  */
2530
2531 static void
2532 mark_set (pat, insn)
2533      rtx pat, insn;
2534 {
2535   rtx dest = SET_DEST (pat);
2536
2537   while (GET_CODE (dest) == SUBREG
2538          || GET_CODE (dest) == ZERO_EXTRACT
2539          || GET_CODE (dest) == SIGN_EXTRACT
2540          || GET_CODE (dest) == STRICT_LOW_PART)
2541     dest = XEXP (dest, 0);
2542
2543   if (GET_CODE (dest) == REG)
2544     SET_BIT (reg_set_bitmap, REGNO (dest));
2545   else if (GET_CODE (dest) == MEM)
2546     mem_last_set = INSN_CUID (insn);
2547
2548   if (GET_CODE (SET_SRC (pat)) == CALL)
2549     mark_call (insn);
2550 }
2551
2552 /* Record things set by a CLOBBER.  */
2553
2554 static void
2555 mark_clobber (pat, insn)
2556      rtx pat, insn;
2557 {
2558   rtx clob = XEXP (pat, 0);
2559
2560   while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2561     clob = XEXP (clob, 0);
2562
2563   if (GET_CODE (clob) == REG)
2564     SET_BIT (reg_set_bitmap, REGNO (clob));
2565   else
2566     mem_last_set = INSN_CUID (insn);
2567 }
2568
2569 /* Record things set by INSN.
2570    This data is used by oprs_not_set_p.  */
2571
2572 static void
2573 mark_oprs_set (insn)
2574      rtx insn;
2575 {
2576   rtx pat = PATTERN (insn);
2577
2578   if (GET_CODE (pat) == SET)
2579     mark_set (pat, insn);
2580   else if (GET_CODE (pat) == PARALLEL)
2581     {
2582       int i;
2583
2584       for (i = 0; i < XVECLEN (pat, 0); i++)
2585         {
2586           rtx x = XVECEXP (pat, 0, i);
2587
2588           if (GET_CODE (x) == SET)
2589             mark_set (x, insn);
2590           else if (GET_CODE (x) == CLOBBER)
2591             mark_clobber (x, insn);
2592           else if (GET_CODE (x) == CALL)
2593             mark_call (insn);
2594         }
2595     }
2596   else if (GET_CODE (pat) == CLOBBER)
2597     mark_clobber (pat, insn);
2598   else if (GET_CODE (pat) == CALL)
2599     mark_call (insn);
2600 }
2601
2602 \f
2603 /* Classic GCSE reaching definition support.  */
2604
2605 /* Allocate reaching def variables.  */
2606
2607 static void
2608 alloc_rd_mem (n_blocks, n_insns)
2609      int n_blocks, n_insns;
2610 {
2611   rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2612   sbitmap_vector_zero (rd_kill, n_basic_blocks);
2613
2614   rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2615   sbitmap_vector_zero (rd_gen, n_basic_blocks);
2616
2617   reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2618   sbitmap_vector_zero (reaching_defs, n_basic_blocks);
2619
2620   rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2621   sbitmap_vector_zero (rd_out, n_basic_blocks);
2622 }
2623
2624 /* Free reaching def variables.  */
2625
2626 static void
2627 free_rd_mem ()
2628 {
2629   free (rd_kill);
2630   free (rd_gen);
2631   free (reaching_defs);
2632   free (rd_out);
2633 }
2634
2635 /* Add INSN to the kills of BB.
2636    REGNO, set in BB, is killed by INSN.  */
2637
2638 static void
2639 handle_rd_kill_set (insn, regno, bb)
2640      rtx insn;
2641      int regno, bb;
2642 {
2643   struct reg_set *this_reg = reg_set_table[regno];
2644
2645   while (this_reg)
2646     {
2647       if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn))
2648         SET_BIT (rd_kill[bb], INSN_CUID (this_reg->insn));
2649       this_reg = this_reg->next;
2650     }
2651 }
2652
2653 /* Compute the set of kill's for reaching definitions.  */
2654
2655 static void
2656 compute_kill_rd ()
2657 {
2658   int bb,cuid;
2659
2660   /* For each block
2661        For each set bit in `gen' of the block (i.e each insn which
2662            generates a definition in the block)
2663          Call the reg set by the insn corresponding to that bit regx
2664          Look at the linked list starting at reg_set_table[regx]
2665          For each setting of regx in the linked list, which is not in
2666              this block
2667            Set the bit in `kill' corresponding to that insn
2668     */
2669
2670   for (bb = 0; bb < n_basic_blocks; bb++)
2671     {
2672       for (cuid = 0; cuid < max_cuid; cuid++)
2673         {
2674           if (TEST_BIT (rd_gen[bb], cuid))
2675             {
2676               rtx insn = CUID_INSN (cuid);
2677               rtx pat = PATTERN (insn);
2678
2679               if (GET_CODE (insn) == CALL_INSN)
2680                 {
2681                   int regno;
2682
2683                   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2684                     {
2685                       if ((call_used_regs[regno]
2686                            && regno != STACK_POINTER_REGNUM
2687 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2688                            && regno != HARD_FRAME_POINTER_REGNUM
2689 #endif
2690 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2691                            && ! (regno == ARG_POINTER_REGNUM
2692                                  && fixed_regs[regno])
2693 #endif
2694 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2695                            && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2696 #endif
2697                            && regno != FRAME_POINTER_REGNUM)
2698                           || global_regs[regno])
2699                         handle_rd_kill_set (insn, regno, bb);
2700                     }
2701                 }
2702
2703               if (GET_CODE (pat) == PARALLEL)
2704                 {
2705                   int i;
2706
2707                   /* We work backwards because ... */
2708                   for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
2709                     {
2710                       enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i));
2711                       if ((code == SET || code == CLOBBER)
2712                           && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG)
2713                         handle_rd_kill_set (insn,
2714                                             REGNO (XEXP (XVECEXP (pat, 0, i), 0)),
2715                                             bb);
2716                     }
2717                 }
2718               else if (GET_CODE (pat) == SET)
2719                 {
2720                   if (GET_CODE (SET_DEST (pat)) == REG)
2721                     {
2722                       /* Each setting of this register outside of this block
2723                          must be marked in the set of kills in this block.  */
2724                       handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), bb);
2725                     }
2726                 }
2727               /* FIXME: CLOBBER? */
2728             }
2729         }
2730     }
2731 }
2732
2733 /* Compute the reaching definitions as in
2734    Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
2735    Chapter 10.  It is the same algorithm as used for computing available
2736    expressions but applied to the gens and kills of reaching definitions.  */
2737
2738 static void
2739 compute_rd ()
2740 {
2741   int bb, changed, passes;
2742
2743   for (bb = 0; bb < n_basic_blocks; bb++)
2744     sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/);
2745
2746   passes = 0;
2747   changed = 1;
2748   while (changed)
2749     {
2750       changed = 0;
2751       for (bb = 0; bb < n_basic_blocks; bb++)
2752         {
2753           sbitmap_union_of_preds (reaching_defs[bb], rd_out, bb);
2754           changed |= sbitmap_union_of_diff (rd_out[bb], rd_gen[bb],
2755                                             reaching_defs[bb], rd_kill[bb]);
2756         }
2757       passes++;
2758     }
2759
2760   if (gcse_file)
2761     fprintf (gcse_file, "reaching def computation: %d passes\n", passes);
2762 }
2763 \f
2764 /* Classic GCSE available expression support.  */
2765
2766 /* Allocate memory for available expression computation.  */
2767
2768 static void
2769 alloc_avail_expr_mem (n_blocks, n_exprs)
2770      int n_blocks, n_exprs;
2771 {
2772   ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2773   sbitmap_vector_zero (ae_kill, n_basic_blocks);
2774
2775   ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2776   sbitmap_vector_zero (ae_gen, n_basic_blocks);
2777
2778   ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2779   sbitmap_vector_zero (ae_in, n_basic_blocks);
2780
2781   ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2782   sbitmap_vector_zero (ae_out, n_basic_blocks);
2783
2784   u_bitmap = (sbitmap) sbitmap_alloc (n_exprs);
2785   sbitmap_ones (u_bitmap);
2786 }
2787
2788 static void
2789 free_avail_expr_mem ()
2790 {
2791   free (ae_kill);
2792   free (ae_gen);
2793   free (ae_in);
2794   free (ae_out);
2795   free (u_bitmap);
2796 }
2797
2798 /* Compute the set of available expressions generated in each basic block.  */
2799
2800 static void
2801 compute_ae_gen ()
2802 {
2803   int i;
2804
2805   /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
2806      This is all we have to do because an expression is not recorded if it
2807      is not available, and the only expressions we want to work with are the
2808      ones that are recorded.  */
2809
2810   for (i = 0; i < expr_hash_table_size; i++)
2811     {
2812       struct expr *expr = expr_hash_table[i];
2813       while (expr != NULL)
2814         {
2815           struct occr *occr = expr->avail_occr;
2816           while (occr != NULL)
2817             {
2818               SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index);
2819               occr = occr->next;
2820             }
2821           expr = expr->next_same_hash;
2822         }
2823     }
2824 }
2825
2826 /* Return non-zero if expression X is killed in BB.  */
2827
2828 static int
2829 expr_killed_p (x, bb)
2830      rtx x;
2831      int bb;
2832 {
2833   int i;
2834   enum rtx_code code;
2835   const char *fmt;
2836
2837   /* repeat is used to turn tail-recursion into iteration.  */
2838  repeat:
2839
2840   if (x == 0)
2841     return 1;
2842
2843   code = GET_CODE (x);
2844   switch (code)
2845     {
2846     case REG:
2847       return TEST_BIT (reg_set_in_block[bb], REGNO (x));
2848
2849     case MEM:
2850       if (mem_set_in_block[bb])
2851         return 1;
2852       x = XEXP (x, 0);
2853       goto repeat;
2854
2855     case PC:
2856     case CC0: /*FIXME*/
2857     case CONST:
2858     case CONST_INT:
2859     case CONST_DOUBLE:
2860     case SYMBOL_REF:
2861     case LABEL_REF:
2862     case ADDR_VEC:
2863     case ADDR_DIFF_VEC:
2864       return 0;
2865
2866     default:
2867       break;
2868     }
2869
2870   i = GET_RTX_LENGTH (code) - 1;
2871   fmt = GET_RTX_FORMAT (code);
2872   for (; i >= 0; i--)
2873     {
2874       if (fmt[i] == 'e')
2875         {
2876           rtx tem = XEXP (x, i);
2877
2878           /* If we are about to do the last recursive call
2879              needed at this level, change it into iteration.
2880              This function is called enough to be worth it.  */
2881           if (i == 0)
2882             {
2883               x = tem;
2884               goto repeat;
2885             }
2886           if (expr_killed_p (tem, bb))
2887             return 1;
2888         }
2889       else if (fmt[i] == 'E')
2890         {
2891           int j;
2892           for (j = 0; j < XVECLEN (x, i); j++)
2893             {
2894               if (expr_killed_p (XVECEXP (x, i, j), bb))
2895                 return 1;
2896             }
2897         }
2898     }
2899
2900   return 0;
2901 }
2902
2903 /* Compute the set of available expressions killed in each basic block.  */
2904
2905 static void
2906 compute_ae_kill (ae_gen, ae_kill)
2907      sbitmap *ae_gen, *ae_kill;
2908 {
2909   int bb,i;
2910
2911   for (bb = 0; bb < n_basic_blocks; bb++)
2912     {
2913       for (i = 0; i < expr_hash_table_size; i++)
2914         {
2915           struct expr *expr = expr_hash_table[i];
2916
2917           for ( ; expr != NULL; expr = expr->next_same_hash)
2918             {
2919               /* Skip EXPR if generated in this block.  */
2920               if (TEST_BIT (ae_gen[bb], expr->bitmap_index))
2921                 continue;
2922
2923               if (expr_killed_p (expr->expr, bb))
2924                 SET_BIT (ae_kill[bb], expr->bitmap_index);
2925             }
2926         }
2927     }
2928 }
2929 \f
2930 /* Actually perform the Classic GCSE optimizations.  */
2931
2932 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
2933
2934    CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
2935    as a positive reach.  We want to do this when there are two computations
2936    of the expression in the block.
2937
2938    VISITED is a pointer to a working buffer for tracking which BB's have
2939    been visited.  It is NULL for the top-level call.
2940
2941    We treat reaching expressions that go through blocks containing the same
2942    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
2943    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2944    2 as not reaching.  The intent is to improve the probability of finding
2945    only one reaching expression and to reduce register lifetimes by picking
2946    the closest such expression.  */
2947
2948 static int
2949 expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited)
2950      struct occr *occr;
2951      struct expr *expr;
2952      int bb;
2953      int check_self_loop;
2954      char *visited;
2955 {
2956   edge pred;
2957
2958   for (pred = BASIC_BLOCK(bb)->pred; pred != NULL; pred = pred->pred_next)
2959     {
2960       int pred_bb = pred->src->index;
2961
2962       if (visited[pred_bb])
2963         {
2964           /* This predecessor has already been visited.
2965              Nothing to do.  */
2966           ;
2967         }
2968       else if (pred_bb == bb)
2969         {
2970           /* BB loops on itself.  */
2971           if (check_self_loop
2972               && TEST_BIT (ae_gen[pred_bb], expr->bitmap_index)
2973               && BLOCK_NUM (occr->insn) == pred_bb)
2974             return 1;
2975           visited[pred_bb] = 1;
2976         }
2977       /* Ignore this predecessor if it kills the expression.  */
2978       else if (TEST_BIT (ae_kill[pred_bb], expr->bitmap_index))
2979         visited[pred_bb] = 1;
2980       /* Does this predecessor generate this expression?  */
2981       else if (TEST_BIT (ae_gen[pred_bb], expr->bitmap_index))
2982         {
2983           /* Is this the occurrence we're looking for?
2984              Note that there's only one generating occurrence per block
2985              so we just need to check the block number.  */
2986           if (BLOCK_NUM (occr->insn) == pred_bb)
2987             return 1;
2988           visited[pred_bb] = 1;
2989         }
2990       /* Neither gen nor kill.  */
2991       else
2992         {
2993           visited[pred_bb] = 1;
2994           if (expr_reaches_here_p_work (occr, expr, pred_bb, check_self_loop,
2995               visited))
2996             return 1;
2997         }
2998     }
2999
3000   /* All paths have been checked.  */
3001   return 0;
3002 }
3003
3004 /* This wrapper for expr_reaches_here_p_work() is to ensure that any
3005    memory allocated for that function is returned. */
3006
3007 static int
3008 expr_reaches_here_p (occr, expr, bb, check_self_loop)
3009      struct occr *occr;
3010      struct expr *expr;
3011      int bb;
3012      int check_self_loop;
3013 {
3014   int rval;
3015   char * visited = (char *) xcalloc (n_basic_blocks, 1);
3016
3017   rval = expr_reaches_here_p_work(occr, expr, bb, check_self_loop, visited);
3018
3019   free (visited);
3020
3021   return (rval);
3022 }
3023
3024 /* Return the instruction that computes EXPR that reaches INSN's basic block.
3025    If there is more than one such instruction, return NULL.
3026
3027    Called only by handle_avail_expr.  */
3028
3029 static rtx
3030 computing_insn (expr, insn)
3031      struct expr *expr;
3032      rtx insn;
3033 {
3034   int bb = BLOCK_NUM (insn);
3035
3036   if (expr->avail_occr->next == NULL)
3037     {
3038       if (BLOCK_NUM (expr->avail_occr->insn) == bb)
3039         {
3040           /* The available expression is actually itself
3041              (i.e. a loop in the flow graph) so do nothing.  */
3042           return NULL;
3043         }
3044       /* (FIXME) Case that we found a pattern that was created by
3045          a substitution that took place.  */
3046       return expr->avail_occr->insn;
3047     }
3048   else
3049     {
3050       /* Pattern is computed more than once.
3051          Search backwards from this insn to see how many of these
3052          computations actually reach this insn.  */
3053       struct occr *occr;
3054       rtx insn_computes_expr = NULL;
3055       int can_reach = 0;
3056
3057       for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
3058         {
3059           if (BLOCK_NUM (occr->insn) == bb)
3060             {
3061               /* The expression is generated in this block.
3062                  The only time we care about this is when the expression
3063                  is generated later in the block [and thus there's a loop].
3064                  We let the normal cse pass handle the other cases.  */
3065               if (INSN_CUID (insn) < INSN_CUID (occr->insn))
3066                 {
3067                   if (expr_reaches_here_p (occr, expr, bb, 1))
3068                     {
3069                       can_reach++;
3070                       if (can_reach > 1)
3071                         return NULL;
3072                       insn_computes_expr = occr->insn;
3073                     }
3074                 }
3075             }
3076           else /* Computation of the pattern outside this block.  */
3077             {
3078               if (expr_reaches_here_p (occr, expr, bb, 0))
3079                 {
3080                   can_reach++;
3081                   if (can_reach > 1)
3082                     return NULL;
3083                   insn_computes_expr = occr->insn;
3084                 }
3085             }
3086         }
3087
3088       if (insn_computes_expr == NULL)
3089         abort ();
3090       return insn_computes_expr;
3091     }
3092 }
3093
3094 /* Return non-zero if the definition in DEF_INSN can reach INSN.
3095    Only called by can_disregard_other_sets.  */
3096
3097 static int
3098 def_reaches_here_p (insn, def_insn)
3099      rtx insn, def_insn;
3100 {
3101   rtx reg;
3102
3103   if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn)))
3104     return 1;
3105
3106   if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn))
3107     {
3108       if (INSN_CUID (def_insn) < INSN_CUID (insn))
3109         {
3110           if (GET_CODE (PATTERN (def_insn)) == PARALLEL)
3111             return 1;
3112           if (GET_CODE (PATTERN (def_insn)) == CLOBBER)
3113             reg = XEXP (PATTERN (def_insn), 0);
3114           else if (GET_CODE (PATTERN (def_insn)) == SET)
3115             reg = SET_DEST (PATTERN (def_insn));
3116           else
3117             abort ();
3118           return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn);
3119         }
3120       else
3121         return 0;
3122     }
3123
3124   return 0;
3125 }
3126
3127 /* Return non-zero if *ADDR_THIS_REG can only have one value at INSN.
3128    The value returned is the number of definitions that reach INSN.
3129    Returning a value of zero means that [maybe] more than one definition
3130    reaches INSN and the caller can't perform whatever optimization it is
3131    trying.  i.e. it is always safe to return zero.  */
3132
3133 static int
3134 can_disregard_other_sets (addr_this_reg, insn, for_combine)
3135      struct reg_set **addr_this_reg;
3136      rtx insn;
3137      int for_combine;
3138 {
3139   int number_of_reaching_defs = 0;
3140   struct reg_set *this_reg = *addr_this_reg;
3141
3142   while (this_reg)
3143     {
3144       if (def_reaches_here_p (insn, this_reg->insn))
3145         {
3146           number_of_reaching_defs++;
3147           /* Ignore parallels for now.  */
3148           if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL)
3149             return 0;
3150           if (!for_combine
3151               && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER
3152                   || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3153                                     SET_SRC (PATTERN (insn)))))
3154             {
3155               /* A setting of the reg to a different value reaches INSN.  */
3156               return 0;
3157             }
3158           if (number_of_reaching_defs > 1)
3159             {
3160               /* If in this setting the value the register is being
3161                  set to is equal to the previous value the register
3162                  was set to and this setting reaches the insn we are
3163                  trying to do the substitution on then we are ok.  */
3164
3165               if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER)
3166                 return 0;
3167               if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3168                                  SET_SRC (PATTERN (insn))))
3169                 return 0;
3170             }
3171           *addr_this_reg = this_reg;
3172         }
3173
3174       /* prev_this_reg = this_reg; */
3175       this_reg = this_reg->next;
3176     }
3177
3178   return number_of_reaching_defs;
3179 }
3180
3181 /* Expression computed by insn is available and the substitution is legal,
3182    so try to perform the substitution.
3183
3184    The result is non-zero if any changes were made.  */
3185
3186 static int
3187 handle_avail_expr (insn, expr)
3188      rtx insn;
3189      struct expr *expr;
3190 {
3191   rtx pat, insn_computes_expr;
3192   rtx to;
3193   struct reg_set *this_reg;
3194   int found_setting, use_src;
3195   int changed = 0;
3196
3197   /* We only handle the case where one computation of the expression
3198      reaches this instruction.  */
3199   insn_computes_expr = computing_insn (expr, insn);
3200   if (insn_computes_expr == NULL)
3201     return 0;
3202
3203   found_setting = 0;
3204   use_src = 0;
3205
3206   /* At this point we know only one computation of EXPR outside of this
3207      block reaches this insn.  Now try to find a register that the
3208      expression is computed into.  */
3209
3210   if (GET_CODE (SET_SRC (PATTERN (insn_computes_expr))) == REG)
3211     {
3212       /* This is the case when the available expression that reaches
3213          here has already been handled as an available expression.  */
3214       int regnum_for_replacing = REGNO (SET_SRC (PATTERN (insn_computes_expr)));
3215       /* If the register was created by GCSE we can't use `reg_set_table',
3216          however we know it's set only once.  */
3217       if (regnum_for_replacing >= max_gcse_regno
3218           /* If the register the expression is computed into is set only once,
3219              or only one set reaches this insn, we can use it.  */
3220           || (((this_reg = reg_set_table[regnum_for_replacing]),
3221                this_reg->next == NULL)
3222               || can_disregard_other_sets (&this_reg, insn, 0)))
3223        {
3224          use_src = 1;
3225          found_setting = 1;
3226        }
3227     }
3228
3229   if (!found_setting)
3230     {
3231       int regnum_for_replacing = REGNO (SET_DEST (PATTERN (insn_computes_expr)));
3232       /* This shouldn't happen.  */
3233       if (regnum_for_replacing >= max_gcse_regno)
3234         abort ();
3235       this_reg = reg_set_table[regnum_for_replacing];
3236       /* If the register the expression is computed into is set only once,
3237          or only one set reaches this insn, use it.  */
3238       if (this_reg->next == NULL
3239           || can_disregard_other_sets (&this_reg, insn, 0))
3240         found_setting = 1;
3241     }
3242
3243   if (found_setting)
3244     {
3245       pat = PATTERN (insn);
3246       if (use_src)
3247         to = SET_SRC (PATTERN (insn_computes_expr));
3248       else
3249         to = SET_DEST (PATTERN (insn_computes_expr));
3250       changed = validate_change (insn, &SET_SRC (pat), to, 0);
3251
3252       /* We should be able to ignore the return code from validate_change but
3253          to play it safe we check.  */
3254       if (changed)
3255         {
3256           gcse_subst_count++;
3257           if (gcse_file != NULL)
3258             {
3259               fprintf (gcse_file, "GCSE: Replacing the source in insn %d with reg %d %s insn %d\n",
3260                        INSN_UID (insn), REGNO (to),
3261                        use_src ? "from" : "set in",
3262                        INSN_UID (insn_computes_expr));
3263             }
3264
3265         }
3266     }
3267   /* The register that the expr is computed into is set more than once.  */
3268   else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3269     {
3270       /* Insert an insn after insnx that copies the reg set in insnx
3271          into a new pseudo register call this new register REGN.
3272          From insnb until end of basic block or until REGB is set
3273          replace all uses of REGB with REGN.  */
3274       rtx new_insn;
3275
3276       to = gen_reg_rtx (GET_MODE (SET_DEST (PATTERN (insn_computes_expr))));
3277
3278       /* Generate the new insn.  */
3279       /* ??? If the change fails, we return 0, even though we created
3280          an insn.  I think this is ok.  */
3281       new_insn
3282         = emit_insn_after (gen_rtx_SET (VOIDmode, to,
3283                                         SET_DEST (PATTERN (insn_computes_expr))),
3284                                   insn_computes_expr);
3285       /* Keep block number table up to date.  */
3286       set_block_num (new_insn, BLOCK_NUM (insn_computes_expr));
3287       /* Keep register set table up to date.  */
3288       record_one_set (REGNO (to), new_insn);
3289
3290       gcse_create_count++;
3291       if (gcse_file != NULL)
3292         {
3293           fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d, computed in insn %d,\n",
3294                    INSN_UID (NEXT_INSN (insn_computes_expr)),
3295                    REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))),
3296                    INSN_UID (insn_computes_expr));
3297           fprintf (gcse_file, "      into newly allocated reg %d\n", REGNO (to));
3298         }
3299
3300       pat = PATTERN (insn);
3301
3302       /* Do register replacement for INSN.  */
3303       changed = validate_change (insn, &SET_SRC (pat),
3304                                  SET_DEST (PATTERN (NEXT_INSN (insn_computes_expr))),
3305                                  0);
3306
3307       /* We should be able to ignore the return code from validate_change but
3308          to play it safe we check.  */
3309       if (changed)
3310         {
3311           gcse_subst_count++;
3312           if (gcse_file != NULL)
3313             {
3314               fprintf (gcse_file, "GCSE: Replacing the source in insn %d with reg %d set in insn %d\n",
3315                        INSN_UID (insn),
3316                        REGNO (SET_DEST (PATTERN (NEXT_INSN (insn_computes_expr)))),
3317                        INSN_UID (insn_computes_expr));
3318             }
3319
3320         }
3321     }
3322
3323   return changed;
3324 }
3325
3326 /* Perform classic GCSE.
3327    This is called by one_classic_gcse_pass after all the dataflow analysis
3328    has been done.
3329
3330    The result is non-zero if a change was made.  */
3331
3332 static int
3333 classic_gcse ()
3334 {
3335   int bb, changed;
3336   rtx insn;
3337
3338   /* Note we start at block 1.  */
3339
3340   changed = 0;
3341   for (bb = 1; bb < n_basic_blocks; bb++)
3342     {
3343       /* Reset tables used to keep track of what's still valid [since the
3344          start of the block].  */
3345       reset_opr_set_tables ();
3346
3347       for (insn = BLOCK_HEAD (bb);
3348            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3349            insn = NEXT_INSN (insn))
3350         {
3351           /* Is insn of form (set (pseudo-reg) ...)?  */
3352
3353           if (GET_CODE (insn) == INSN
3354               && GET_CODE (PATTERN (insn)) == SET
3355               && GET_CODE (SET_DEST (PATTERN (insn))) == REG
3356               && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER)
3357             {
3358               rtx pat = PATTERN (insn);
3359               rtx src = SET_SRC (pat);
3360               struct expr *expr;
3361
3362               if (want_to_gcse_p (src)
3363                   /* Is the expression recorded?  */
3364                   && ((expr = lookup_expr (src)) != NULL)
3365                   /* Is the expression available [at the start of the
3366                      block]?  */
3367                   && TEST_BIT (ae_in[bb], expr->bitmap_index)
3368                   /* Are the operands unchanged since the start of the
3369                      block?  */
3370                   && oprs_not_set_p (src, insn))
3371                 changed |= handle_avail_expr (insn, expr);
3372             }
3373
3374           /* Keep track of everything modified by this insn.  */
3375           /* ??? Need to be careful w.r.t. mods done to INSN.  */
3376           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
3377             mark_oprs_set (insn);
3378         }
3379     }
3380
3381   return changed;
3382 }
3383
3384 /* Top level routine to perform one classic GCSE pass.
3385
3386    Return non-zero if a change was made.  */
3387
3388 static int
3389 one_classic_gcse_pass (pass)
3390      int pass;
3391 {
3392   int changed = 0;
3393
3394   gcse_subst_count = 0;
3395   gcse_create_count = 0;
3396
3397   alloc_expr_hash_table (max_cuid);
3398   alloc_rd_mem (n_basic_blocks, max_cuid);
3399   compute_expr_hash_table ();
3400   if (gcse_file)
3401     dump_hash_table (gcse_file, "Expression", expr_hash_table,
3402                      expr_hash_table_size, n_exprs);
3403   if (n_exprs > 0)
3404     {
3405       compute_kill_rd ();
3406       compute_rd ();
3407       alloc_avail_expr_mem (n_basic_blocks, n_exprs);
3408       compute_ae_gen ();
3409       compute_ae_kill (ae_gen, ae_kill);
3410       compute_available (ae_gen, ae_kill, ae_out, ae_in);
3411       changed = classic_gcse ();
3412       free_avail_expr_mem ();
3413     }
3414   free_rd_mem ();
3415   free_expr_hash_table ();
3416
3417   if (gcse_file)
3418     {
3419       fprintf (gcse_file, "\n");
3420       fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs, %d insns created\n",
3421                current_function_name, pass,
3422                bytes_used, gcse_subst_count, gcse_create_count);
3423     }
3424
3425   return changed;
3426 }
3427 \f
3428 /* Compute copy/constant propagation working variables.  */
3429
3430 /* Local properties of assignments.  */
3431
3432 static sbitmap *cprop_pavloc;
3433 static sbitmap *cprop_absaltered;
3434
3435 /* Global properties of assignments (computed from the local properties).  */
3436
3437 static sbitmap *cprop_avin;
3438 static sbitmap *cprop_avout;
3439
3440 /* Allocate vars used for copy/const propagation.
3441    N_BLOCKS is the number of basic blocks.
3442    N_SETS is the number of sets.  */
3443
3444 static void
3445 alloc_cprop_mem (n_blocks, n_sets)
3446      int n_blocks, n_sets;
3447 {
3448   cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
3449   cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
3450
3451   cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
3452   cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
3453 }
3454
3455 /* Free vars used by copy/const propagation.  */
3456
3457 static void
3458 free_cprop_mem ()
3459 {
3460   free (cprop_pavloc);
3461   free (cprop_absaltered);
3462   free (cprop_avin);
3463   free (cprop_avout);
3464 }
3465
3466 /* For each block, compute whether X is transparent.
3467    X is either an expression or an assignment [though we don't care which,
3468    for this context an assignment is treated as an expression].
3469    For each block where an element of X is modified, set (SET_P == 1) or reset
3470    (SET_P == 0) the INDX bit in BMAP.  */
3471
3472 static void
3473 compute_transp (x, indx, bmap, set_p)
3474      rtx x;
3475      int indx;
3476      sbitmap *bmap;
3477      int set_p;
3478 {
3479   int bb,i;
3480   enum rtx_code code;
3481   const char *fmt;
3482
3483   /* repeat is used to turn tail-recursion into iteration.  */
3484  repeat:
3485
3486   if (x == 0)
3487     return;
3488
3489   code = GET_CODE (x);
3490   switch (code)
3491     {
3492     case REG:
3493       {
3494         reg_set *r;
3495         int regno = REGNO (x);
3496
3497         if (set_p)
3498           {
3499             if (regno < FIRST_PSEUDO_REGISTER)
3500               {
3501                 for (bb = 0; bb < n_basic_blocks; bb++)
3502                   if (TEST_BIT (reg_set_in_block[bb], regno))
3503                     SET_BIT (bmap[bb], indx);
3504               }
3505             else
3506               {
3507                 for (r = reg_set_table[regno]; r != NULL; r = r->next)
3508                   {
3509                     bb = BLOCK_NUM (r->insn);
3510                     SET_BIT (bmap[bb], indx);
3511                   }
3512               }
3513           }
3514         else
3515           {
3516             if (regno < FIRST_PSEUDO_REGISTER)
3517               {
3518                 for (bb = 0; bb < n_basic_blocks; bb++)
3519                   if (TEST_BIT (reg_set_in_block[bb], regno))
3520                     RESET_BIT (bmap[bb], indx);
3521               }
3522             else
3523               {
3524                 for (r = reg_set_table[regno]; r != NULL; r = r->next)
3525                   {
3526                     bb = BLOCK_NUM (r->insn);
3527                     RESET_BIT (bmap[bb], indx);
3528                   }
3529               }
3530           }
3531         return;
3532       }
3533
3534     case MEM:
3535       if (set_p)
3536         {
3537           for (bb = 0; bb < n_basic_blocks; bb++)
3538             if (mem_set_in_block[bb])
3539               SET_BIT (bmap[bb], indx);
3540         }
3541       else
3542         {
3543           for (bb = 0; bb < n_basic_blocks; bb++)
3544             if (mem_set_in_block[bb])
3545               RESET_BIT (bmap[bb], indx);
3546         }
3547       x = XEXP (x, 0);
3548       goto repeat;
3549
3550     case PC:
3551     case CC0: /*FIXME*/
3552     case CONST:
3553     case CONST_INT:
3554     case CONST_DOUBLE:
3555     case SYMBOL_REF:
3556     case LABEL_REF:
3557     case ADDR_VEC:
3558     case ADDR_DIFF_VEC:
3559       return;
3560
3561     default:
3562       break;
3563     }
3564
3565   i = GET_RTX_LENGTH (code) - 1;
3566   fmt = GET_RTX_FORMAT (code);
3567   for (; i >= 0; i--)
3568     {
3569       if (fmt[i] == 'e')
3570         {
3571           rtx tem = XEXP (x, i);
3572
3573           /* If we are about to do the last recursive call
3574              needed at this level, change it into iteration.
3575              This function is called enough to be worth it.  */
3576           if (i == 0)
3577             {
3578               x = tem;
3579               goto repeat;
3580             }
3581           compute_transp (tem, indx, bmap, set_p);
3582         }
3583       else if (fmt[i] == 'E')
3584         {
3585           int j;
3586           for (j = 0; j < XVECLEN (x, i); j++)
3587             compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
3588         }
3589     }
3590 }
3591
3592 /* Top level routine to do the dataflow analysis needed by copy/const
3593    propagation.  */
3594
3595 static void
3596 compute_cprop_data ()
3597 {
3598   compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, 1);
3599   compute_available (cprop_pavloc, cprop_absaltered,
3600                      cprop_avout, cprop_avin);
3601 }
3602 \f
3603 /* Copy/constant propagation.  */
3604
3605 /* Maximum number of register uses in an insn that we handle.  */
3606 #define MAX_USES 8
3607
3608 /* Table of uses found in an insn.
3609    Allocated statically to avoid alloc/free complexity and overhead.  */
3610 static struct reg_use reg_use_table[MAX_USES];
3611
3612 /* Index into `reg_use_table' while building it.  */
3613 static int reg_use_count;
3614
3615 /* Set up a list of register numbers used in INSN.
3616    The found uses are stored in `reg_use_table'.
3617    `reg_use_count' is initialized to zero before entry, and
3618    contains the number of uses in the table upon exit.
3619
3620    ??? If a register appears multiple times we will record it multiple
3621    times.  This doesn't hurt anything but it will slow things down.  */
3622
3623 static void
3624 find_used_regs (x)
3625      rtx x;
3626 {
3627   int i;
3628   enum rtx_code code;
3629   const char *fmt;
3630
3631   /* repeat is used to turn tail-recursion into iteration.  */
3632  repeat:
3633
3634   if (x == 0)
3635     return;
3636
3637   code = GET_CODE (x);
3638   switch (code)
3639     {
3640     case REG:
3641       if (reg_use_count == MAX_USES)
3642         return;
3643       reg_use_table[reg_use_count].reg_rtx = x;
3644       reg_use_count++;
3645       return;
3646
3647     case MEM:
3648       x = XEXP (x, 0);
3649       goto repeat;
3650
3651     case PC:
3652     case CC0:
3653     case CONST:
3654     case CONST_INT:
3655     case CONST_DOUBLE:
3656     case SYMBOL_REF:
3657     case LABEL_REF:
3658     case CLOBBER:
3659     case ADDR_VEC:
3660     case ADDR_DIFF_VEC:
3661     case ASM_INPUT: /*FIXME*/
3662       return;
3663
3664     case SET:
3665       if (GET_CODE (SET_DEST (x)) == MEM)
3666         find_used_regs (SET_DEST (x));
3667       x = SET_SRC (x);
3668       goto repeat;
3669
3670     default:
3671       break;
3672     }
3673
3674   /* Recursively scan the operands of this expression.  */
3675
3676   fmt = GET_RTX_FORMAT (code);
3677   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3678     {
3679       if (fmt[i] == 'e')
3680         {
3681           /* If we are about to do the last recursive call
3682              needed at this level, change it into iteration.
3683              This function is called enough to be worth it.  */
3684           if (i == 0)
3685             {
3686               x = XEXP (x, 0);
3687               goto repeat;
3688             }
3689           find_used_regs (XEXP (x, i));
3690         }
3691       else if (fmt[i] == 'E')
3692         {
3693           int j;
3694           for (j = 0; j < XVECLEN (x, i); j++)
3695             find_used_regs (XVECEXP (x, i, j));
3696         }
3697     }
3698 }
3699
3700 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3701    Returns non-zero is successful.  */
3702
3703 static int
3704 try_replace_reg (from, to, insn)
3705      rtx from, to, insn;
3706 {
3707   rtx note;
3708   rtx src;
3709   int success;
3710   rtx set;
3711
3712   note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3713
3714   if (!note)
3715     note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3716
3717   /* If this fails we could try to simplify the result of the
3718      replacement and attempt to recognize the simplified insn.
3719
3720      But we need a general simplify_rtx that doesn't have pass
3721      specific state variables.  I'm not aware of one at the moment.  */
3722
3723
3724   success = validate_replace_src (from, to, insn);
3725   set = single_set (insn);
3726
3727   /* We've failed to do replacement. Try to add REG_EQUAL note to not loose
3728      information.  */
3729   if (!success && !note)
3730     {
3731       if (!set)
3732         return 0;
3733       note = REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL,
3734                                                    copy_rtx (SET_SRC (set)),
3735                                                    REG_NOTES (insn));
3736     }
3737
3738   /* Always do the replacement in REQ_EQUAL and REG_EQUIV notes.  Also
3739      try to simplify them.  */
3740   if (note)
3741     {
3742       rtx simplified;
3743       src = XEXP (note, 0);
3744       replace_rtx (src, from, to);
3745
3746       /* Try to simplify resulting note. */
3747       simplified = simplify_rtx (src);
3748       if (simplified)
3749         {
3750           src = simplified;
3751           XEXP (note, 0) = src;
3752         }
3753
3754       /* REG_EQUAL may get simplified into register.
3755          We don't allow that. Remove that note. This code ought
3756          not to hapen, because previous code ought to syntetize
3757          reg-reg move, but be on the safe side.  */
3758       else if (REG_P (src))
3759         remove_note (insn, note);
3760     }
3761   return success;
3762 }
3763 /* Find a set of REGNO that is available on entry to INSN's block.
3764    Returns NULL if not found.  */
3765
3766 static struct expr *
3767 find_avail_set (regno, insn)
3768      int regno;
3769      rtx insn;
3770 {
3771   /* SET1 contains the last set found that can be returned to the caller for
3772      use in a substitution.  */
3773   struct expr *set1 = 0;
3774
3775   /* Loops are not possible here.  To get a loop we would need two sets
3776      available at the start of the block containing INSN.  ie we would
3777      need two sets like this available at the start of the block:
3778
3779        (set (reg X) (reg Y))
3780        (set (reg Y) (reg X))
3781
3782      This can not happen since the set of (reg Y) would have killed the
3783      set of (reg X) making it unavailable at the start of this block.  */
3784   while (1)
3785      {
3786       rtx src;
3787       struct expr *set = lookup_set (regno, NULL_RTX);
3788
3789       /* Find a set that is available at the start of the block
3790          which contains INSN.  */
3791       while (set)
3792         {
3793           if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
3794             break;
3795           set = next_set (regno, set);
3796         }
3797
3798       /* If no available set was found we've reached the end of the
3799          (possibly empty) copy chain.  */
3800       if (set == 0)
3801         break;
3802
3803       if (GET_CODE (set->expr) != SET)
3804         abort ();
3805
3806       src = SET_SRC (set->expr);
3807
3808       /* We know the set is available.
3809          Now check that SRC is ANTLOC (i.e. none of the source operands
3810          have changed since the start of the block).
3811
3812          If the source operand changed, we may still use it for the next
3813          iteration of this loop, but we may not use it for substitutions.  */
3814       if (CONSTANT_P (src) || oprs_not_set_p (src, insn))
3815         set1 = set;
3816
3817       /* If the source of the set is anything except a register, then
3818          we have reached the end of the copy chain.  */
3819       if (GET_CODE (src) != REG)
3820         break;
3821
3822       /* Follow the copy chain, ie start another iteration of the loop
3823          and see if we have an available copy into SRC.  */
3824       regno = REGNO (src);
3825      }
3826
3827   /* SET1 holds the last set that was available and anticipatable at
3828      INSN.  */
3829   return set1;
3830 }
3831
3832 /* Subroutine of cprop_insn that tries to propagate constants into
3833    JUMP_INSNS.  INSN must be a conditional jump; COPY is a copy of it
3834    that we can use for substitutions.
3835    REG_USED is the use we will try to replace, SRC is the constant we
3836    will try to substitute for it.
3837    Returns nonzero if a change was made.  */
3838 static int
3839 cprop_jump (insn, copy, reg_used, src)
3840      rtx insn, copy;
3841      struct reg_use *reg_used;
3842      rtx src;
3843 {
3844   rtx set = PATTERN (copy);
3845   rtx temp;
3846
3847   /* Replace the register with the appropriate constant.  */
3848   replace_rtx (SET_SRC (set), reg_used->reg_rtx, src);
3849
3850   temp = simplify_ternary_operation (GET_CODE (SET_SRC (set)),
3851                                      GET_MODE (SET_SRC (set)),
3852                                      GET_MODE (XEXP (SET_SRC (set), 0)),
3853                                      XEXP (SET_SRC (set), 0),
3854                                      XEXP (SET_SRC (set), 1),
3855                                      XEXP (SET_SRC (set), 2));
3856
3857   /* If no simplification can be made, then try the next
3858      register.  */
3859   if (temp == 0)
3860     return 0;
3861
3862   SET_SRC (set) = temp;
3863
3864   /* That may have changed the structure of TEMP, so
3865      force it to be rerecognized if it has not turned
3866      into a nop or unconditional jump.  */
3867
3868   INSN_CODE (copy) = -1;
3869   if ((SET_DEST (set) == pc_rtx
3870        && (SET_SRC (set) == pc_rtx
3871            || GET_CODE (SET_SRC (set)) == LABEL_REF))
3872       || recog (PATTERN (copy), copy, NULL) >= 0)
3873     {
3874       /* This has either become an unconditional jump
3875          or a nop-jump.  We'd like to delete nop jumps
3876          here, but doing so confuses gcse.  So we just
3877          make the replacement and let later passes
3878          sort things out.  */
3879       PATTERN (insn) = set;
3880       INSN_CODE (insn) = -1;
3881
3882       /* One less use of the label this insn used to jump to
3883          if we turned this into a NOP jump.  */
3884       if (SET_SRC (set) == pc_rtx && JUMP_LABEL (insn) != 0)
3885         --LABEL_NUSES (JUMP_LABEL (insn));
3886
3887       /* If this has turned into an unconditional jump,
3888          then put a barrier after it so that the unreachable
3889          code will be deleted.  */
3890       if (GET_CODE (SET_SRC (set)) == LABEL_REF)
3891         emit_barrier_after (insn);
3892
3893       run_jump_opt_after_gcse = 1;
3894
3895       const_prop_count++;
3896       if (gcse_file != NULL)
3897         {
3898           int regno = REGNO (reg_used->reg_rtx);
3899           fprintf (gcse_file, "CONST-PROP: Replacing reg %d in insn %d with constant ",
3900                    regno, INSN_UID (insn));
3901           print_rtl (gcse_file, src);
3902           fprintf (gcse_file, "\n");
3903         }
3904       return 1;
3905     }
3906   return 0;
3907 }
3908
3909 #ifdef HAVE_cc0
3910 /* Subroutine of cprop_insn that tries to propagate constants into
3911    JUMP_INSNS for machines that have CC0.  INSN is a single set that
3912    stores into CC0; the insn following it is a conditional jump.
3913    REG_USED is the use we will try to replace, SRC is the constant we
3914    will try to substitute for it.
3915    Returns nonzero if a change was made.  */
3916 static int
3917 cprop_cc0_jump (insn, reg_used, src)
3918      rtx insn;
3919      struct reg_use *reg_used;
3920      rtx src;
3921 {
3922   rtx jump = NEXT_INSN (insn);
3923   rtx copy = copy_rtx (jump);
3924   rtx set = PATTERN (copy);
3925
3926   /* We need to copy the source of the cc0 setter, as cprop_jump is going to
3927      substitute into it.  */
3928   replace_rtx (SET_SRC (set), cc0_rtx, copy_rtx (SET_SRC (PATTERN (insn))));
3929   if (! cprop_jump (jump, copy, reg_used, src))
3930     return 0;
3931
3932   /* If we succeeded, delete the cc0 setter.  */
3933   PUT_CODE (insn, NOTE);
3934   NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3935   NOTE_SOURCE_FILE (insn) = 0;
3936   return 1;
3937  }
3938 #endif
3939
3940 /* Perform constant and copy propagation on INSN.
3941    The result is non-zero if a change was made.  */
3942
3943 static int
3944 cprop_insn (insn, alter_jumps)
3945      rtx insn;
3946      int alter_jumps;
3947 {
3948   struct reg_use *reg_used;
3949   int changed = 0;
3950   rtx note;
3951
3952   /* Only propagate into SETs.  Note that a conditional jump is a
3953      SET with pc_rtx as the destination.  */
3954   if ((GET_CODE (insn) != INSN
3955        && GET_CODE (insn) != JUMP_INSN)
3956       || GET_CODE (PATTERN (insn)) != SET)
3957     return 0;
3958
3959   reg_use_count = 0;
3960   find_used_regs (PATTERN (insn));
3961
3962   note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3963   if (!note)
3964     note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3965
3966   /* We may win even when propagating constants into notes. */
3967   if (note)
3968     find_used_regs (XEXP (note, 0));
3969
3970   reg_used = &reg_use_table[0];
3971   for ( ; reg_use_count > 0; reg_used++, reg_use_count--)
3972     {
3973       rtx pat, src;
3974       struct expr *set;
3975       int regno = REGNO (reg_used->reg_rtx);
3976
3977       /* Ignore registers created by GCSE.
3978          We do this because ... */
3979       if (regno >= max_gcse_regno)
3980         continue;
3981
3982       /* If the register has already been set in this block, there's
3983          nothing we can do.  */
3984       if (! oprs_not_set_p (reg_used->reg_rtx, insn))
3985         continue;
3986
3987       /* Find an assignment that sets reg_used and is available
3988          at the start of the block.  */
3989       set = find_avail_set (regno, insn);
3990       if (! set)
3991         continue;
3992
3993       pat = set->expr;
3994       /* ??? We might be able to handle PARALLELs.  Later.  */
3995       if (GET_CODE (pat) != SET)
3996         abort ();
3997       src = SET_SRC (pat);
3998
3999       /* Constant propagation.  */
4000       if (GET_CODE (src) == CONST_INT || GET_CODE (src) == CONST_DOUBLE
4001           || GET_CODE (src) == SYMBOL_REF)
4002         {
4003           /* Handle normal insns first.  */
4004           if (GET_CODE (insn) == INSN
4005               && try_replace_reg (reg_used->reg_rtx, src, insn))
4006             {
4007               changed = 1;
4008               const_prop_count++;
4009               if (gcse_file != NULL)
4010                 {
4011                   fprintf (gcse_file, "CONST-PROP: Replacing reg %d in insn %d with constant ",
4012                            regno, INSN_UID (insn));
4013                   print_rtl (gcse_file, src);
4014                   fprintf (gcse_file, "\n");
4015                 }
4016
4017               /* The original insn setting reg_used may or may not now be
4018                  deletable.  We leave the deletion to flow.  */
4019             }
4020
4021           /* Try to propagate a CONST_INT into a conditional jump.
4022              We're pretty specific about what we will handle in this
4023              code, we can extend this as necessary over time.
4024
4025              Right now the insn in question must look like
4026              (set (pc) (if_then_else ...))  */
4027           else if (alter_jumps
4028                    && GET_CODE (insn) == JUMP_INSN
4029                    && condjump_p (insn)
4030                    && ! simplejump_p (insn))
4031             changed |= cprop_jump (insn, copy_rtx (insn), reg_used, src);
4032 #ifdef HAVE_cc0
4033           /* Similar code for machines that use a pair of CC0 setter and
4034              conditional jump insn.  */
4035           else if (alter_jumps
4036                    && GET_CODE (PATTERN (insn)) == SET
4037                    && SET_DEST (PATTERN (insn)) == cc0_rtx
4038                    && GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
4039                    && condjump_p (NEXT_INSN (insn))
4040                    && ! simplejump_p (NEXT_INSN (insn)))
4041             changed |= cprop_cc0_jump (insn, reg_used, src);
4042 #endif
4043         }
4044       else if (GET_CODE (src) == REG
4045                && REGNO (src) >= FIRST_PSEUDO_REGISTER
4046                && REGNO (src) != regno)
4047         {
4048           if (try_replace_reg (reg_used->reg_rtx, src, insn))
4049             {
4050               changed = 1;
4051               copy_prop_count++;
4052               if (gcse_file != NULL)
4053                 {
4054                   fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d with reg %d\n",
4055                            regno, INSN_UID (insn), REGNO (src));
4056                 }
4057
4058               /* The original insn setting reg_used may or may not now be
4059                  deletable.  We leave the deletion to flow.  */
4060               /* FIXME: If it turns out that the insn isn't deletable,
4061                  then we may have unnecessarily extended register lifetimes
4062                  and made things worse.  */
4063             }
4064         }
4065     }
4066
4067   return changed;
4068 }
4069
4070 /* Forward propagate copies.
4071    This includes copies and constants.
4072    Return non-zero if a change was made.  */
4073
4074 static int
4075 cprop (alter_jumps)
4076      int alter_jumps;
4077 {
4078   int bb, changed;
4079   rtx insn;
4080
4081   /* Note we start at block 1.  */
4082
4083   changed = 0;
4084   for (bb = 1; bb < n_basic_blocks; bb++)
4085     {
4086       /* Reset tables used to keep track of what's still valid [since the
4087          start of the block].  */
4088       reset_opr_set_tables ();
4089
4090       for (insn = BLOCK_HEAD (bb);
4091            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
4092            insn = NEXT_INSN (insn))
4093         {
4094           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
4095             {
4096               changed |= cprop_insn (insn, alter_jumps);
4097
4098               /* Keep track of everything modified by this insn.  */
4099               /* ??? Need to be careful w.r.t. mods done to INSN.  Don't
4100                  call mark_oprs_set if we turned the insn into a NOTE.  */
4101               if (GET_CODE (insn) != NOTE)
4102                 mark_oprs_set (insn);
4103             }
4104         }
4105     }
4106
4107   if (gcse_file != NULL)
4108     fprintf (gcse_file, "\n");
4109
4110   return changed;
4111 }
4112
4113 /* Perform one copy/constant propagation pass.
4114    F is the first insn in the function.
4115    PASS is the pass count.  */
4116
4117 static int
4118 one_cprop_pass (pass, alter_jumps)
4119      int pass;
4120      int alter_jumps;
4121 {
4122   int changed = 0;
4123
4124   const_prop_count = 0;
4125   copy_prop_count = 0;
4126
4127   alloc_set_hash_table (max_cuid);
4128   compute_set_hash_table ();
4129   if (gcse_file)
4130     dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size,
4131                      n_sets);
4132   if (n_sets > 0)
4133     {
4134       alloc_cprop_mem (n_basic_blocks, n_sets);
4135       compute_cprop_data ();
4136       changed = cprop (alter_jumps);
4137       free_cprop_mem ();
4138     }
4139   free_set_hash_table ();
4140
4141   if (gcse_file)
4142     {
4143       fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, %d const props, %d copy props\n",
4144                current_function_name, pass,
4145                bytes_used, const_prop_count, copy_prop_count);
4146       fprintf (gcse_file, "\n");
4147     }
4148
4149   return changed;
4150 }
4151 \f
4152 /* Compute PRE+LCM working variables.  */
4153
4154 /* Local properties of expressions.  */
4155 /* Nonzero for expressions that are transparent in the block.  */
4156 static sbitmap *transp;
4157
4158 /* Nonzero for expressions that are transparent at the end of the block.
4159    This is only zero for expressions killed by abnormal critical edge
4160    created by a calls.  */
4161 static sbitmap *transpout;
4162
4163 /* Nonzero for expressions that are computed (available) in the block.  */
4164 static sbitmap *comp;
4165
4166 /* Nonzero for expressions that are locally anticipatable in the block.  */
4167 static sbitmap *antloc;
4168
4169 /* Nonzero for expressions where this block is an optimal computation
4170    point.  */
4171 static sbitmap *pre_optimal;
4172
4173 /* Nonzero for expressions which are redundant in a particular block.  */
4174 static sbitmap *pre_redundant;
4175
4176 /* Nonzero for expressions which should be inserted on a specific edge.  */
4177 static sbitmap *pre_insert_map;
4178
4179 /* Nonzero for expressions which should be deleted in a specific block.  */
4180 static sbitmap *pre_delete_map;
4181
4182 /* Contains the edge_list returned by pre_edge_lcm.  */
4183 static struct edge_list *edge_list;
4184
4185 static sbitmap *temp_bitmap;
4186
4187 /* Redundant insns.  */
4188 static sbitmap pre_redundant_insns;
4189
4190 /* Allocate vars used for PRE analysis.  */
4191
4192 static void
4193 alloc_pre_mem (n_blocks, n_exprs)
4194      int n_blocks, n_exprs;
4195 {
4196   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4197   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4198   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
4199   temp_bitmap = sbitmap_vector_alloc (n_blocks, n_exprs);
4200
4201   pre_optimal = NULL;
4202   pre_redundant = NULL;
4203   pre_insert_map = NULL;
4204   pre_delete_map = NULL;
4205   ae_in = NULL;
4206   ae_out = NULL;
4207   u_bitmap = NULL;
4208   transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
4209   ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
4210   /* pre_insert and pre_delete are allocated later.  */
4211 }
4212
4213 /* Free vars used for PRE analysis.  */
4214
4215 static void
4216 free_pre_mem ()
4217 {
4218   free (transp);
4219   free (comp);
4220   free (antloc);
4221   free (temp_bitmap);
4222
4223   if (pre_optimal)
4224     free (pre_optimal);
4225   if (pre_redundant)
4226     free (pre_redundant);
4227   if (pre_insert_map)
4228     free (pre_insert_map);
4229   if (pre_delete_map)
4230     free (pre_delete_map);
4231   if (transpout)
4232     free (transpout);
4233
4234   if (ae_in)
4235     free (ae_in);
4236   if (ae_out)
4237     free (ae_out);
4238   if (ae_kill)
4239     free (ae_kill);
4240   if (u_bitmap)
4241     free (u_bitmap);
4242
4243   transp = comp = antloc = NULL;
4244   pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
4245   transpout = ae_in = ae_out = ae_kill = NULL;
4246   u_bitmap = NULL;
4247
4248 }
4249
4250 /* Top level routine to do the dataflow analysis needed by PRE.  */
4251
4252 static void
4253 compute_pre_data ()
4254 {
4255   compute_local_properties (transp, comp, antloc, 0);
4256   compute_transpout ();
4257   sbitmap_vector_zero (ae_kill, n_basic_blocks);
4258   compute_ae_kill (comp, ae_kill);
4259   edge_list = pre_edge_lcm (gcse_file, n_exprs, transp, comp, antloc,
4260                             ae_kill, &pre_insert_map, &pre_delete_map);
4261 }
4262
4263 \f
4264 /* PRE utilities */
4265
4266 /* Return non-zero if an occurrence of expression EXPR in OCCR_BB would reach
4267    block BB.
4268
4269    VISITED is a pointer to a working buffer for tracking which BB's have
4270    been visited.  It is NULL for the top-level call.
4271
4272    We treat reaching expressions that go through blocks containing the same
4273    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
4274    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4275    2 as not reaching.  The intent is to improve the probability of finding
4276    only one reaching expression and to reduce register lifetimes by picking
4277    the closest such expression.  */
4278
4279 static int
4280 pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited)
4281      int occr_bb;
4282      struct expr *expr;
4283      int bb;
4284      char *visited;
4285 {
4286   edge pred;
4287
4288   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
4289     {
4290       int pred_bb = pred->src->index;
4291
4292       if (pred->src == ENTRY_BLOCK_PTR
4293           /* Has predecessor has already been visited?  */
4294           || visited[pred_bb])
4295         {
4296           /* Nothing to do.  */
4297         }
4298       /* Does this predecessor generate this expression?  */
4299       else if (TEST_BIT (comp[pred_bb], expr->bitmap_index))
4300         {
4301           /* Is this the occurrence we're looking for?
4302              Note that there's only one generating occurrence per block
4303              so we just need to check the block number.  */
4304           if (occr_bb == pred_bb)
4305             return 1;
4306           visited[pred_bb] = 1;
4307         }
4308       /* Ignore this predecessor if it kills the expression.  */
4309       else if (! TEST_BIT (transp[pred_bb], expr->bitmap_index))
4310         visited[pred_bb] = 1;
4311       /* Neither gen nor kill.  */
4312       else
4313         {
4314           visited[pred_bb] = 1;
4315           if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
4316             return 1;
4317         }
4318     }
4319
4320   /* All paths have been checked.  */
4321   return 0;
4322 }
4323
4324 /* The wrapper for pre_expr_reaches_here_work that ensures that any
4325    memory allocated for that function is returned. */
4326
4327 static int
4328 pre_expr_reaches_here_p (occr_bb, expr, bb)
4329      int occr_bb;
4330      struct expr *expr;
4331      int bb;
4332 {
4333   int rval;
4334   char * visited = (char *) xcalloc (n_basic_blocks, 1);
4335
4336   rval = pre_expr_reaches_here_p_work(occr_bb, expr, bb, visited);
4337
4338   free (visited);
4339
4340   return (rval);
4341 }
4342 \f
4343
4344 /* Given an expr, generate RTL which we can insert at the end of a BB,
4345    or on an edge.  Set the block number of any insns generated to
4346    the value of BB.  */
4347
4348 static rtx
4349 process_insert_insn (expr)
4350      struct expr *expr;
4351 {
4352   rtx reg = expr->reaching_reg;
4353   rtx pat, copied_expr;
4354   rtx first_new_insn;
4355
4356   start_sequence ();
4357   copied_expr = copy_rtx (expr->expr);
4358   emit_move_insn (reg, copied_expr);
4359   first_new_insn = get_insns ();
4360   pat = gen_sequence ();
4361   end_sequence ();
4362
4363   return pat;
4364 }
4365
4366 /* Add EXPR to the end of basic block BB.
4367
4368    This is used by both the PRE and code hoisting.
4369
4370    For PRE, we want to verify that the expr is either transparent
4371    or locally anticipatable in the target block.  This check makes
4372    no sense for code hoisting.  */
4373
4374 static void
4375 insert_insn_end_bb (expr, bb, pre)
4376      struct expr *expr;
4377      int bb;
4378      int pre;
4379 {
4380   rtx insn = BLOCK_END (bb);
4381   rtx new_insn;
4382   rtx reg = expr->reaching_reg;
4383   int regno = REGNO (reg);
4384   rtx pat;
4385
4386   pat = process_insert_insn (expr);
4387
4388   /* If the last insn is a jump, insert EXPR in front [taking care to
4389      handle cc0, etc. properly].  */
4390
4391   if (GET_CODE (insn) == JUMP_INSN)
4392     {
4393 #ifdef HAVE_cc0
4394       rtx note;
4395 #endif
4396
4397       /* If this is a jump table, then we can't insert stuff here.  Since
4398          we know the previous real insn must be the tablejump, we insert
4399          the new instruction just before the tablejump.  */
4400       if (GET_CODE (PATTERN (insn)) == ADDR_VEC
4401           || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
4402         insn = prev_real_insn (insn);
4403
4404 #ifdef HAVE_cc0
4405       /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4406          if cc0 isn't set.  */
4407       note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
4408       if (note)
4409         insn = XEXP (note, 0);
4410       else
4411         {
4412           rtx maybe_cc0_setter = prev_nonnote_insn (insn);
4413           if (maybe_cc0_setter
4414               && GET_RTX_CLASS (GET_CODE (maybe_cc0_setter)) == 'i'
4415               && sets_cc0_p (PATTERN (maybe_cc0_setter)))
4416             insn = maybe_cc0_setter;
4417         }
4418 #endif
4419       /* FIXME: What if something in cc0/jump uses value set in new insn?  */
4420       new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4421     }
4422   /* Likewise if the last insn is a call, as will happen in the presence
4423      of exception handling.  */
4424   else if (GET_CODE (insn) == CALL_INSN)
4425     {
4426       HARD_REG_SET parm_regs;
4427       int nparm_regs;
4428       rtx p;
4429
4430       /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4431          we search backward and place the instructions before the first
4432          parameter is loaded.  Do this for everyone for consistency and a
4433          presumtion that we'll get better code elsewhere as well.  */
4434
4435       /* It should always be the case that we can put these instructions
4436          anywhere in the basic block with performing PRE optimizations.
4437          Check this.  */
4438       if (pre
4439           && !TEST_BIT (antloc[bb], expr->bitmap_index)
4440           && !TEST_BIT (transp[bb], expr->bitmap_index))
4441         abort ();
4442
4443       /* Since different machines initialize their parameter registers
4444          in different orders, assume nothing.  Collect the set of all
4445          parameter registers.  */
4446       CLEAR_HARD_REG_SET (parm_regs);
4447       nparm_regs = 0;
4448       for (p = CALL_INSN_FUNCTION_USAGE (insn); p ; p = XEXP (p, 1))
4449         if (GET_CODE (XEXP (p, 0)) == USE
4450             && GET_CODE (XEXP (XEXP (p, 0), 0)) == REG)
4451           {
4452             int regno = REGNO (XEXP (XEXP (p, 0), 0));
4453             if (regno >= FIRST_PSEUDO_REGISTER)
4454               abort ();
4455             SET_HARD_REG_BIT (parm_regs, regno);
4456             nparm_regs++;
4457           }
4458
4459       /* Search backward for the first set of a register in this set.  */
4460       while (nparm_regs && BLOCK_HEAD (bb) != insn)
4461         {
4462           insn = PREV_INSN (insn);
4463           p = single_set (insn);
4464           if (p && GET_CODE (SET_DEST (p)) == REG
4465               && REGNO (SET_DEST (p)) < FIRST_PSEUDO_REGISTER
4466               && TEST_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p))))
4467             {
4468               CLEAR_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p)));
4469               nparm_regs--;
4470             }
4471         }
4472
4473       /* If we found all the parameter loads, then we want to insert
4474          before the first parameter load.
4475
4476          If we did not find all the parameter loads, then we might have
4477          stopped on the head of the block, which could be a CODE_LABEL.
4478          If we inserted before the CODE_LABEL, then we would be putting
4479          the insn in the wrong basic block.  In that case, put the insn
4480          after the CODE_LABEL.  Also, respect NOTE_INSN_BASIC_BLOCK.  */
4481       if (GET_CODE (insn) == CODE_LABEL)
4482         insn = NEXT_INSN (insn);
4483       if (GET_CODE (insn) == NOTE
4484           && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
4485         insn = NEXT_INSN (insn);
4486       new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4487     }
4488   else
4489     {
4490       new_insn = emit_insn_after (pat, insn);
4491       BLOCK_END (bb) = new_insn;
4492     }
4493
4494   /* Keep block number table up to date.
4495      Note, PAT could be a multiple insn sequence, we have to make
4496      sure that each insn in the sequence is handled.  */
4497   if (GET_CODE (pat) == SEQUENCE)
4498     {
4499       int i;
4500
4501       for (i = 0; i < XVECLEN (pat, 0); i++)
4502         {
4503           rtx insn = XVECEXP (pat, 0, i);
4504           set_block_num (insn, bb);
4505           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
4506             add_label_notes (PATTERN (insn), new_insn);
4507           note_stores (PATTERN (insn), record_set_info, insn);
4508         }
4509     }
4510   else
4511     {
4512       add_label_notes (SET_SRC (pat), new_insn);
4513       set_block_num (new_insn, bb);
4514       /* Keep register set table up to date.  */
4515       record_one_set (regno, new_insn);
4516     }
4517
4518   gcse_create_count++;
4519
4520   if (gcse_file)
4521     {
4522       fprintf (gcse_file, "PRE/HOIST: end of bb %d, insn %d, copying expression %d to reg %d\n",
4523                bb, INSN_UID (new_insn), expr->bitmap_index, regno);
4524     }
4525 }
4526
4527 /* Insert partially redundant expressions on edges in the CFG to make
4528    the expressions fully redundant.  */
4529
4530 static int
4531 pre_edge_insert (edge_list, index_map)
4532      struct edge_list *edge_list;
4533      struct expr **index_map;
4534 {
4535   int e, i, num_edges, set_size, did_insert = 0;
4536   sbitmap *inserted;
4537
4538   /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
4539      if it reaches any of the deleted expressions.  */
4540
4541   set_size = pre_insert_map[0]->size;
4542   num_edges = NUM_EDGES (edge_list);
4543   inserted = sbitmap_vector_alloc (num_edges, n_exprs);
4544   sbitmap_vector_zero (inserted, num_edges);
4545
4546   for (e = 0; e < num_edges; e++)
4547     {
4548       int indx;
4549       basic_block pred = INDEX_EDGE_PRED_BB (edge_list, e);
4550       int bb = pred->index;
4551
4552       for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
4553         {
4554           SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
4555           int j;
4556
4557           for (j = indx; insert && j < n_exprs; j++, insert >>= 1)
4558             {
4559               if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
4560                 {
4561                   struct expr *expr = index_map[j];
4562                   struct occr *occr;
4563
4564                   /* Now look at each deleted occurence of this expression.  */
4565                   for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4566                     {
4567                       if (! occr->deleted_p)
4568                         continue;
4569
4570                       /* Insert this expression on this edge if if it would
4571                          reach the deleted occurence in BB.  */
4572                       if (!TEST_BIT (inserted[e], j))
4573                         {
4574                           rtx insn;
4575                           edge eg = INDEX_EDGE (edge_list, e);
4576                           /* We can't insert anything on an abnormal
4577                              and critical edge, so we insert the
4578                              insn at the end of the previous block. There
4579                              are several alternatives detailed in
4580                              Morgans book P277 (sec 10.5) for handling
4581                              this situation.  This one is easiest for now.  */
4582
4583                           if ((eg->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL)
4584                             {
4585                               insert_insn_end_bb (index_map[j], bb, 0);
4586                             }
4587                           else
4588                             {
4589                               insn = process_insert_insn (index_map[j]);
4590                               insert_insn_on_edge (insn, eg);
4591                             }
4592                           if (gcse_file)
4593                             {
4594                               fprintf (gcse_file,
4595                                        "PRE/HOIST: edge (%d,%d), copy expression %d\n",
4596                                         bb,
4597                                         INDEX_EDGE_SUCC_BB (edge_list, e)->index, expr->bitmap_index);
4598                             }
4599                           SET_BIT (inserted[e], j);
4600                           did_insert = 1;
4601                           gcse_create_count++;
4602                         }
4603                     }
4604                 }
4605             }
4606         }
4607     }
4608
4609   /* Clean up.  */
4610   free (inserted);
4611
4612   return did_insert;
4613 }
4614
4615 /* Copy the result of INSN to REG.
4616    INDX is the expression number.  */
4617
4618 static void
4619 pre_insert_copy_insn (expr, insn)
4620      struct expr *expr;
4621      rtx insn;
4622 {
4623   rtx reg = expr->reaching_reg;
4624   int regno = REGNO (reg);
4625   int indx = expr->bitmap_index;
4626   rtx set = single_set (insn);
4627   rtx new_insn;
4628   int bb = BLOCK_NUM (insn);
4629
4630   if (!set)
4631     abort ();
4632   new_insn = emit_insn_after (gen_rtx_SET (VOIDmode, reg, SET_DEST (set)),
4633                               insn);
4634   /* Keep block number table up to date.  */
4635   set_block_num (new_insn, bb);
4636   /* Keep register set table up to date.  */
4637   record_one_set (regno, new_insn);
4638   if (insn == BLOCK_END (bb))
4639     BLOCK_END (bb) = new_insn;
4640
4641   gcse_create_count++;
4642
4643   if (gcse_file)
4644     fprintf (gcse_file,
4645              "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
4646               BLOCK_NUM (insn), INSN_UID (new_insn), indx,
4647               INSN_UID (insn), regno);
4648 }
4649
4650 /* Copy available expressions that reach the redundant expression
4651    to `reaching_reg'.  */
4652
4653 static void
4654 pre_insert_copies ()
4655 {
4656   int i;
4657
4658   /* For each available expression in the table, copy the result to
4659      `reaching_reg' if the expression reaches a deleted one.
4660
4661      ??? The current algorithm is rather brute force.
4662      Need to do some profiling.  */
4663
4664   for (i = 0; i < expr_hash_table_size; i++)
4665     {
4666       struct expr *expr;
4667
4668       for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4669         {
4670           struct occr *occr;
4671
4672           /* If the basic block isn't reachable, PPOUT will be TRUE.
4673              However, we don't want to insert a copy here because the
4674              expression may not really be redundant.  So only insert
4675              an insn if the expression was deleted.
4676              This test also avoids further processing if the expression
4677              wasn't deleted anywhere.  */
4678           if (expr->reaching_reg == NULL)
4679             continue;
4680
4681           for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4682             {
4683               struct occr *avail;
4684
4685               if (! occr->deleted_p)
4686                 continue;
4687
4688               for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
4689                 {
4690                   rtx insn = avail->insn;
4691
4692                   /* No need to handle this one if handled already.  */
4693                   if (avail->copied_p)
4694                     continue;
4695                   /* Don't handle this one if it's a redundant one.  */
4696                   if (TEST_BIT (pre_redundant_insns, INSN_CUID (insn)))
4697                     continue;
4698                   /* Or if the expression doesn't reach the deleted one.  */
4699                   if (! pre_expr_reaches_here_p (BLOCK_NUM (avail->insn), expr,
4700                                                  BLOCK_NUM (occr->insn)))
4701                     continue;
4702
4703                   /* Copy the result of avail to reaching_reg.  */
4704                   pre_insert_copy_insn (expr, insn);
4705                   avail->copied_p = 1;
4706                 }
4707             }
4708         }
4709     }
4710 }
4711
4712 /* Delete redundant computations.
4713    Deletion is done by changing the insn to copy the `reaching_reg' of
4714    the expression into the result of the SET.  It is left to later passes
4715    (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4716
4717    Returns non-zero if a change is made.  */
4718
4719 static int
4720 pre_delete ()
4721 {
4722   int i, bb, changed;
4723
4724   /* Compute the expressions which are redundant and need to be replaced by
4725      copies from the reaching reg to the target reg.  */
4726   for (bb = 0; bb < n_basic_blocks; bb++)
4727     sbitmap_copy (temp_bitmap[bb], pre_delete_map[bb]);
4728
4729   changed = 0;
4730   for (i = 0; i < expr_hash_table_size; i++)
4731     {
4732       struct expr *expr;
4733
4734       for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4735         {
4736           struct occr *occr;
4737           int indx = expr->bitmap_index;
4738
4739           /* We only need to search antic_occr since we require
4740              ANTLOC != 0.  */
4741
4742           for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4743             {
4744               rtx insn = occr->insn;
4745               rtx set;
4746               int bb = BLOCK_NUM (insn);
4747
4748               if (TEST_BIT (temp_bitmap[bb], indx))
4749                 {
4750                   set = single_set (insn);
4751                   if (! set)
4752                     abort ();
4753
4754                   /* Create a pseudo-reg to store the result of reaching
4755                      expressions into.  Get the mode for the new pseudo
4756                      from the mode of the original destination pseudo.  */
4757                   if (expr->reaching_reg == NULL)
4758                     expr->reaching_reg
4759                       = gen_reg_rtx (GET_MODE (SET_DEST (set)));
4760
4761                   /* In theory this should never fail since we're creating
4762                      a reg->reg copy.
4763
4764                      However, on the x86 some of the movXX patterns actually
4765                      contain clobbers of scratch regs.  This may cause the
4766                      insn created by validate_change to not match any pattern
4767                      and thus cause validate_change to fail.   */
4768                   if (validate_change (insn, &SET_SRC (set),
4769                                        expr->reaching_reg, 0))
4770                     {
4771                       occr->deleted_p = 1;
4772                       SET_BIT (pre_redundant_insns, INSN_CUID (insn));
4773                       changed = 1;
4774                       gcse_subst_count++;
4775                     }
4776
4777                   if (gcse_file)
4778                     {
4779                       fprintf (gcse_file,
4780                                "PRE: redundant insn %d (expression %d) in bb %d, reaching reg is %d\n",
4781                                INSN_UID (insn), indx, bb, REGNO (expr->reaching_reg));
4782                     }
4783                 }
4784             }
4785         }
4786     }
4787
4788   return changed;
4789 }
4790
4791 /* Perform GCSE optimizations using PRE.
4792    This is called by one_pre_gcse_pass after all the dataflow analysis
4793    has been done.
4794
4795    This is based on the original Morel-Renvoise paper Fred Chow's thesis,
4796    and lazy code motion from Knoop, Ruthing and Steffen as described in
4797    Advanced Compiler Design and Implementation.
4798
4799    ??? A new pseudo reg is created to hold the reaching expression.
4800    The nice thing about the classical approach is that it would try to
4801    use an existing reg.  If the register can't be adequately optimized
4802    [i.e. we introduce reload problems], one could add a pass here to
4803    propagate the new register through the block.
4804
4805    ??? We don't handle single sets in PARALLELs because we're [currently]
4806    not able to copy the rest of the parallel when we insert copies to create
4807    full redundancies from partial redundancies.  However, there's no reason
4808    why we can't handle PARALLELs in the cases where there are no partial
4809    redundancies.  */
4810
4811 static int
4812 pre_gcse ()
4813 {
4814   int i, did_insert;
4815   int changed;
4816   struct expr **index_map;
4817
4818   /* Compute a mapping from expression number (`bitmap_index') to
4819      hash table entry.  */
4820
4821   index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
4822   for (i = 0; i < expr_hash_table_size; i++)
4823     {
4824       struct expr *expr;
4825
4826       for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4827         index_map[expr->bitmap_index] = expr;
4828     }
4829
4830   /* Reset bitmap used to track which insns are redundant.  */
4831   pre_redundant_insns = sbitmap_alloc (max_cuid);
4832   sbitmap_zero (pre_redundant_insns);
4833
4834   /* Delete the redundant insns first so that
4835      - we know what register to use for the new insns and for the other
4836        ones with reaching expressions
4837      - we know which insns are redundant when we go to create copies  */
4838   changed = pre_delete ();
4839
4840   did_insert = pre_edge_insert (edge_list, index_map);
4841   /* In other places with reaching expressions, copy the expression to the
4842      specially allocated pseudo-reg that reaches the redundant expr.  */
4843   pre_insert_copies ();
4844   if (did_insert)
4845     {
4846       commit_edge_insertions ();
4847       changed = 1;
4848     }
4849
4850   free (index_map);
4851   free (pre_redundant_insns);
4852
4853   return changed;
4854 }
4855
4856 /* Top level routine to perform one PRE GCSE pass.
4857
4858    Return non-zero if a change was made.  */
4859
4860 static int
4861 one_pre_gcse_pass (pass)
4862      int pass;
4863 {
4864   int changed = 0;
4865
4866   gcse_subst_count = 0;
4867   gcse_create_count = 0;
4868
4869   alloc_expr_hash_table (max_cuid);
4870   add_noreturn_fake_exit_edges ();
4871   compute_expr_hash_table ();
4872   if (gcse_file)
4873     dump_hash_table (gcse_file, "Expression", expr_hash_table,
4874                      expr_hash_table_size, n_exprs);
4875   if (n_exprs > 0)
4876     {
4877       alloc_pre_mem (n_basic_blocks, n_exprs);
4878       compute_pre_data ();
4879       changed |= pre_gcse ();
4880       free_edge_list (edge_list);
4881       free_pre_mem ();
4882     }
4883   remove_fake_edges ();
4884   free_expr_hash_table ();
4885
4886   if (gcse_file)
4887     {
4888       fprintf (gcse_file, "\n");
4889       fprintf (gcse_file, "PRE GCSE of %s, pass %d: %d bytes needed, %d substs, %d insns created\n",
4890                current_function_name, pass,
4891                bytes_used, gcse_subst_count, gcse_create_count);
4892     }
4893
4894   return changed;
4895 }
4896 \f
4897 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
4898    We have to add REG_LABEL notes, because the following loop optimization
4899    pass requires them.  */
4900
4901 /* ??? This is very similar to the loop.c add_label_notes function.  We
4902    could probably share code here.  */
4903
4904 /* ??? If there was a jump optimization pass after gcse and before loop,
4905    then we would not need to do this here, because jump would add the
4906    necessary REG_LABEL notes.  */
4907
4908 static void
4909 add_label_notes (x, insn)
4910      rtx x;
4911      rtx insn;
4912 {
4913   enum rtx_code code = GET_CODE (x);
4914   int i, j;
4915   const char *fmt;
4916
4917   if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4918     {
4919       /* This code used to ignore labels that referred to dispatch tables to
4920          avoid flow generating (slighly) worse code.
4921
4922          We no longer ignore such label references (see LABEL_REF handling in
4923          mark_jump_label for additional information).  */
4924       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, XEXP (x, 0),
4925                                             REG_NOTES (insn));
4926       return;
4927     }
4928
4929   fmt = GET_RTX_FORMAT (code);
4930   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4931     {
4932       if (fmt[i] == 'e')
4933         add_label_notes (XEXP (x, i), insn);
4934       else if (fmt[i] == 'E')
4935         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4936           add_label_notes (XVECEXP (x, i, j), insn);
4937     }
4938 }
4939
4940 /* Compute transparent outgoing information for each block.
4941
4942    An expression is transparent to an edge unless it is killed by
4943    the edge itself.  This can only happen with abnormal control flow,
4944    when the edge is traversed through a call.  This happens with
4945    non-local labels and exceptions.
4946
4947    This would not be necessary if we split the edge.  While this is
4948    normally impossible for abnormal critical edges, with some effort
4949    it should be possible with exception handling, since we still have
4950    control over which handler should be invoked.  But due to increased
4951    EH table sizes, this may not be worthwhile.  */
4952
4953 static void
4954 compute_transpout ()
4955 {
4956   int bb;
4957
4958   sbitmap_vector_ones (transpout, n_basic_blocks);
4959
4960   for (bb = 0; bb < n_basic_blocks; ++bb)
4961     {
4962       int i;
4963
4964       /* Note that flow inserted a nop a the end of basic blocks that
4965          end in call instructions for reasons other than abnormal
4966          control flow.  */
4967       if (GET_CODE (BLOCK_END (bb)) != CALL_INSN)
4968         continue;
4969
4970       for (i = 0; i < expr_hash_table_size; i++)
4971         {
4972           struct expr *expr;
4973           for (expr = expr_hash_table[i]; expr ; expr = expr->next_same_hash)
4974             if (GET_CODE (expr->expr) == MEM)
4975               {
4976                 rtx addr = XEXP (expr->expr, 0);
4977
4978                 if (GET_CODE (addr) == SYMBOL_REF
4979                     && CONSTANT_POOL_ADDRESS_P (addr))
4980                   continue;
4981
4982                 /* ??? Optimally, we would use interprocedural alias
4983                    analysis to determine if this mem is actually killed
4984                    by this call.  */
4985                 RESET_BIT (transpout[bb], expr->bitmap_index);
4986               }
4987         }
4988     }
4989 }
4990
4991 /* Removal of useless null pointer checks */
4992
4993 /* Called via note_stores.  X is set by SETTER.  If X is a register we must
4994    invalidate nonnull_local and set nonnull_killed.  DATA is really a
4995    `null_pointer_info *'.
4996
4997    We ignore hard registers.  */
4998 static void
4999 invalidate_nonnull_info (x, setter, data)
5000      rtx x;
5001      rtx setter ATTRIBUTE_UNUSED;
5002      void *data;
5003 {
5004   int offset, regno;
5005   struct null_pointer_info* npi = (struct null_pointer_info *) data;
5006
5007   offset = 0;
5008   while (GET_CODE (x) == SUBREG)
5009     x = SUBREG_REG (x);
5010
5011   /* Ignore anything that is not a register or is a hard register.  */
5012   if (GET_CODE (x) != REG
5013       || REGNO (x) < npi->min_reg
5014       || REGNO (x) >= npi->max_reg)
5015     return;
5016
5017   regno = REGNO (x) - npi->min_reg;
5018
5019   RESET_BIT (npi->nonnull_local[npi->current_block], regno);
5020   SET_BIT (npi->nonnull_killed[npi->current_block], regno);
5021 }
5022
5023 /* Do null-pointer check elimination for the registers indicated in
5024    NPI.  NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps;
5025    they are not our responsibility to free.  */
5026
5027 static void
5028 delete_null_pointer_checks_1 (block_reg, nonnull_avin, nonnull_avout, npi)
5029      int *block_reg;
5030      sbitmap *nonnull_avin;
5031      sbitmap *nonnull_avout;
5032      struct null_pointer_info *npi;
5033 {
5034   int bb;
5035   int current_block;
5036   sbitmap *nonnull_local = npi->nonnull_local;
5037   sbitmap *nonnull_killed = npi->nonnull_killed;
5038
5039   /* Compute local properties, nonnull and killed.  A register will have
5040      the nonnull property if at the end of the current block its value is
5041      known to be nonnull.  The killed property indicates that somewhere in
5042      the block any information we had about the register is killed.
5043
5044      Note that a register can have both properties in a single block.  That
5045      indicates that it's killed, then later in the block a new value is
5046      computed.  */
5047   sbitmap_vector_zero (nonnull_local, n_basic_blocks);
5048   sbitmap_vector_zero (nonnull_killed, n_basic_blocks);
5049   for (current_block = 0; current_block < n_basic_blocks; current_block++)
5050     {
5051       rtx insn, stop_insn;
5052
5053       /* Set the current block for invalidate_nonnull_info.  */
5054       npi->current_block = current_block;
5055
5056       /* Scan each insn in the basic block looking for memory references and
5057          register sets.  */
5058       stop_insn = NEXT_INSN (BLOCK_END (current_block));
5059       for (insn = BLOCK_HEAD (current_block);
5060            insn != stop_insn;
5061            insn = NEXT_INSN (insn))
5062         {
5063           rtx set;
5064           rtx reg;
5065
5066           /* Ignore anything that is not a normal insn.  */
5067           if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
5068             continue;
5069
5070           /* Basically ignore anything that is not a simple SET.  We do have
5071              to make sure to invalidate nonnull_local and set nonnull_killed
5072              for such insns though.  */
5073           set = single_set (insn);
5074           if (!set)
5075             {
5076               note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
5077               continue;
5078             }
5079
5080           /* See if we've got a useable memory load.  We handle it first
5081              in case it uses its address register as a dest (which kills
5082              the nonnull property).  */
5083           if (GET_CODE (SET_SRC (set)) == MEM
5084               && GET_CODE ((reg = XEXP (SET_SRC (set), 0))) == REG
5085               && REGNO (reg) >= npi->min_reg
5086               && REGNO (reg) < npi->max_reg)
5087             SET_BIT (nonnull_local[current_block],
5088                      REGNO (reg) - npi->min_reg);
5089
5090           /* Now invalidate stuff clobbered by this insn.  */
5091           note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
5092
5093           /* And handle stores, we do these last since any sets in INSN can
5094              not kill the nonnull property if it is derived from a MEM
5095              appearing in a SET_DEST.  */
5096           if (GET_CODE (SET_DEST (set)) == MEM
5097               && GET_CODE ((reg = XEXP (SET_DEST (set), 0))) == REG
5098               && REGNO (reg) >= npi->min_reg
5099               && REGNO (reg) < npi->max_reg)
5100             SET_BIT (nonnull_local[current_block],
5101                      REGNO (reg) - npi->min_reg);
5102         }
5103     }
5104
5105   /* Now compute global properties based on the local properties.   This
5106      is a classic global availablity algorithm.  */
5107   compute_available (nonnull_local, nonnull_killed,
5108                      nonnull_avout, nonnull_avin);
5109
5110   /* Now look at each bb and see if it ends with a compare of a value
5111      against zero.  */
5112   for (bb = 0; bb < n_basic_blocks; bb++)
5113     {
5114       rtx last_insn = BLOCK_END (bb);
5115       rtx condition, earliest;
5116       int compare_and_branch;
5117
5118       /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and
5119          since BLOCK_REG[BB] is zero if this block did not end with a
5120          comparison against zero, this condition works.  */
5121       if (block_reg[bb] < npi->min_reg
5122           || block_reg[bb] >= npi->max_reg)
5123         continue;
5124
5125       /* LAST_INSN is a conditional jump.  Get its condition.  */
5126       condition = get_condition (last_insn, &earliest);
5127
5128       /* If we can't determine the condition then skip.  */
5129       if (! condition)
5130         continue;
5131
5132       /* Is the register known to have a nonzero value?  */
5133       if (!TEST_BIT (nonnull_avout[bb], block_reg[bb] - npi->min_reg))
5134         continue;
5135
5136       /* Try to compute whether the compare/branch at the loop end is one or
5137          two instructions.  */
5138       if (earliest == last_insn)
5139         compare_and_branch = 1;
5140       else if (earliest == prev_nonnote_insn (last_insn))
5141         compare_and_branch = 2;
5142       else
5143         continue;
5144
5145       /* We know the register in this comparison is nonnull at exit from
5146          this block.  We can optimize this comparison.  */
5147       if (GET_CODE (condition) == NE)
5148         {
5149           rtx new_jump;
5150
5151           new_jump = emit_jump_insn_before (gen_jump (JUMP_LABEL (last_insn)),
5152                                             last_insn);
5153           JUMP_LABEL (new_jump) = JUMP_LABEL (last_insn);
5154           LABEL_NUSES (JUMP_LABEL (new_jump))++;
5155           emit_barrier_after (new_jump);
5156         }
5157       delete_insn (last_insn);
5158       if (compare_and_branch == 2)
5159         delete_insn (earliest);
5160
5161       /* Don't check this block again.  (Note that BLOCK_END is
5162          invalid here; we deleted the last instruction in the
5163          block.)  */
5164       block_reg[bb] = 0;
5165     }
5166 }
5167
5168 /* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
5169    at compile time.
5170
5171    This is conceptually similar to global constant/copy propagation and
5172    classic global CSE (it even uses the same dataflow equations as cprop).
5173
5174    If a register is used as memory address with the form (mem (reg)), then we
5175    know that REG can not be zero at that point in the program.  Any instruction
5176    which sets REG "kills" this property.
5177
5178    So, if every path leading to a conditional branch has an available memory
5179    reference of that form, then we know the register can not have the value
5180    zero at the conditional branch.
5181
5182    So we merely need to compute the local properies and propagate that data
5183    around the cfg, then optimize where possible.
5184
5185    We run this pass two times.  Once before CSE, then again after CSE.  This
5186    has proven to be the most profitable approach.  It is rare for new
5187    optimization opportunities of this nature to appear after the first CSE
5188    pass.
5189
5190    This could probably be integrated with global cprop with a little work.  */
5191
5192 void
5193 delete_null_pointer_checks (f)
5194      rtx f;
5195 {
5196   sbitmap *nonnull_avin, *nonnull_avout;
5197   int *block_reg;
5198   int bb;
5199   int reg;
5200   int regs_per_pass;
5201   int max_reg;
5202   struct null_pointer_info npi;
5203
5204   /* First break the program into basic blocks.  */
5205   find_basic_blocks (f, max_reg_num (), NULL);
5206   cleanup_cfg (f);
5207
5208   /* If we have only a single block, then there's nothing to do.  */
5209   if (n_basic_blocks <= 1)
5210     {
5211       /* Free storage allocated by find_basic_blocks.  */
5212       free_basic_block_vars (0);
5213       return;
5214     }
5215
5216   /* Trying to perform global optimizations on flow graphs which have
5217      a high connectivity will take a long time and is unlikely to be
5218      particularly useful.
5219
5220      In normal circumstances a cfg should have about twice has many edges
5221      as blocks.  But we do not want to punish small functions which have
5222      a couple switch statements.  So we require a relatively large number
5223      of basic blocks and the ratio of edges to blocks to be high.  */
5224   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
5225     {
5226       /* Free storage allocated by find_basic_blocks.  */
5227       free_basic_block_vars (0);
5228       return;
5229     }
5230
5231   /* We need four bitmaps, each with a bit for each register in each
5232      basic block.  */
5233   max_reg = max_reg_num ();
5234   regs_per_pass = get_bitmap_width (4, n_basic_blocks, max_reg);
5235
5236   /* Allocate bitmaps to hold local and global properties.  */
5237   npi.nonnull_local = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5238   npi.nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5239   nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5240   nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5241
5242   /* Go through the basic blocks, seeing whether or not each block
5243      ends with a conditional branch whose condition is a comparison
5244      against zero.  Record the register compared in BLOCK_REG.  */
5245   block_reg = (int *) xcalloc (n_basic_blocks, sizeof (int));
5246   for (bb = 0; bb < n_basic_blocks; bb++)
5247     {
5248       rtx last_insn = BLOCK_END (bb);
5249       rtx condition, earliest, reg;
5250
5251       /* We only want conditional branches.  */
5252       if (GET_CODE (last_insn) != JUMP_INSN
5253           || !condjump_p (last_insn)
5254           || simplejump_p (last_insn))
5255         continue;
5256
5257       /* LAST_INSN is a conditional jump.  Get its condition.  */
5258       condition = get_condition (last_insn, &earliest);
5259
5260       /* If we were unable to get the condition, or it is not a equality
5261          comparison against zero then there's nothing we can do.  */
5262       if (!condition
5263           || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
5264           || GET_CODE (XEXP (condition, 1)) != CONST_INT
5265           || (XEXP (condition, 1)
5266               != CONST0_RTX (GET_MODE (XEXP (condition, 0)))))
5267         continue;
5268
5269       /* We must be checking a register against zero.  */
5270       reg = XEXP (condition, 0);
5271       if (GET_CODE (reg) != REG)
5272         continue;
5273
5274       block_reg[bb] = REGNO (reg);
5275     }
5276
5277   /* Go through the algorithm for each block of registers.  */
5278   for (reg = FIRST_PSEUDO_REGISTER; reg < max_reg; reg += regs_per_pass)
5279     {
5280       npi.min_reg = reg;
5281       npi.max_reg = MIN (reg + regs_per_pass, max_reg);
5282       delete_null_pointer_checks_1 (block_reg, nonnull_avin,
5283                                     nonnull_avout, &npi);
5284     }
5285
5286   /* Free storage allocated by find_basic_blocks.  */
5287   free_basic_block_vars (0);
5288
5289   /* Free the table of registers compared at the end of every block.  */
5290   free (block_reg);
5291
5292   /* Free bitmaps.  */
5293   free (npi.nonnull_local);
5294   free (npi.nonnull_killed);
5295   free (nonnull_avin);
5296   free (nonnull_avout);
5297 }
5298
5299 /* Code Hoisting variables and subroutines.  */
5300
5301 /* Very busy expressions.  */
5302 static sbitmap *hoist_vbein;
5303 static sbitmap *hoist_vbeout;
5304
5305 /* Hoistable expressions.  */
5306 static sbitmap *hoist_exprs;
5307
5308 /* Dominator bitmaps.  */
5309 static sbitmap *dominators;
5310
5311 /* ??? We could compute post dominators and run this algorithm in
5312    reverse to to perform tail merging, doing so would probably be
5313    more effective than the tail merging code in jump.c.
5314
5315    It's unclear if tail merging could be run in parallel with
5316    code hoisting.  It would be nice.  */
5317
5318 /* Allocate vars used for code hoisting analysis.  */
5319
5320 static void
5321 alloc_code_hoist_mem (n_blocks, n_exprs)
5322      int n_blocks, n_exprs;
5323 {
5324   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
5325   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
5326   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
5327
5328   hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
5329   hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
5330   hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
5331   transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
5332
5333   dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
5334 }
5335
5336 /* Free vars used for code hoisting analysis.  */
5337
5338 static void
5339 free_code_hoist_mem ()
5340 {
5341   free (antloc);
5342   free (transp);
5343   free (comp);
5344
5345   free (hoist_vbein);
5346   free (hoist_vbeout);
5347   free (hoist_exprs);
5348   free (transpout);
5349
5350   free (dominators);
5351 }
5352
5353 /* Compute the very busy expressions at entry/exit from each block.
5354
5355    An expression is very busy if all paths from a given point
5356    compute the expression.  */
5357
5358 static void
5359 compute_code_hoist_vbeinout ()
5360 {
5361   int bb, changed, passes;
5362
5363   sbitmap_vector_zero (hoist_vbeout, n_basic_blocks);
5364   sbitmap_vector_zero (hoist_vbein, n_basic_blocks);
5365
5366   passes = 0;
5367   changed = 1;
5368   while (changed)
5369     {
5370       changed = 0;
5371       /* We scan the blocks in the reverse order to speed up
5372          the convergence.  */
5373       for (bb = n_basic_blocks - 1; bb >= 0; bb--)
5374         {
5375           changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb],
5376                                            hoist_vbeout[bb], transp[bb]);
5377           if (bb != n_basic_blocks - 1)
5378             sbitmap_intersection_of_succs (hoist_vbeout[bb], hoist_vbein, bb);
5379         }
5380       passes++;
5381     }
5382
5383   if (gcse_file)
5384     fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes);
5385 }
5386
5387 /* Top level routine to do the dataflow analysis needed by code hoisting.  */
5388
5389 static void
5390 compute_code_hoist_data ()
5391 {
5392   compute_local_properties (transp, comp, antloc, 0);
5393   compute_transpout ();
5394   compute_code_hoist_vbeinout ();
5395   compute_flow_dominators (dominators, NULL);
5396   if (gcse_file)
5397     fprintf (gcse_file, "\n");
5398 }
5399
5400 /* Determine if the expression identified by EXPR_INDEX would
5401    reach BB unimpared if it was placed at the end of EXPR_BB.
5402
5403    It's unclear exactly what Muchnick meant by "unimpared".  It seems
5404    to me that the expression must either be computed or transparent in
5405    *every* block in the path(s) from EXPR_BB to BB.  Any other definition
5406    would allow the expression to be hoisted out of loops, even if
5407    the expression wasn't a loop invariant.
5408
5409    Contrast this to reachability for PRE where an expression is
5410    considered reachable if *any* path reaches instead of *all*
5411    paths.  */
5412
5413 static int
5414 hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited)
5415      int expr_bb;
5416      int expr_index;
5417      int bb;
5418      char *visited;
5419 {
5420   edge pred;
5421   int visited_allocated_locally = 0;
5422
5423
5424   if (visited == NULL)
5425     {
5426        visited_allocated_locally = 1;
5427        visited = xcalloc (n_basic_blocks, 1);
5428     }
5429
5430   visited[expr_bb] = 1;
5431   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
5432     {
5433       int pred_bb = pred->src->index;
5434
5435       if (pred->src == ENTRY_BLOCK_PTR)
5436         break;
5437       else if (visited[pred_bb])
5438         continue;
5439       /* Does this predecessor generate this expression?  */
5440       else if (TEST_BIT (comp[pred_bb], expr_index))
5441         break;
5442       else if (! TEST_BIT (transp[pred_bb], expr_index))
5443         break;
5444       /* Not killed.  */
5445       else
5446         {
5447           visited[pred_bb] = 1;
5448           if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
5449                                            pred_bb, visited))
5450             break;
5451         }
5452     }
5453   if (visited_allocated_locally)
5454     free (visited);
5455   return (pred == NULL);
5456 }
5457 \f
5458 /* Actually perform code hoisting.  */
5459 static void
5460 hoist_code ()
5461 {
5462   int bb, dominated, i;
5463   struct expr **index_map;
5464
5465   sbitmap_vector_zero (hoist_exprs, n_basic_blocks);
5466
5467   /* Compute a mapping from expression number (`bitmap_index') to
5468      hash table entry.  */
5469
5470   index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
5471   for (i = 0; i < expr_hash_table_size; i++)
5472     {
5473       struct expr *expr;
5474
5475       for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
5476         index_map[expr->bitmap_index] = expr;
5477     }
5478
5479   /* Walk over each basic block looking for potentially hoistable
5480      expressions, nothing gets hoisted from the entry block.  */
5481   for (bb = 0; bb < n_basic_blocks; bb++)
5482     {
5483       int found = 0;
5484       int insn_inserted_p;
5485
5486       /* Examine each expression that is very busy at the exit of this
5487          block.  These are the potentially hoistable expressions.  */
5488       for (i = 0; i < hoist_vbeout[bb]->n_bits; i++)
5489         {
5490           int hoistable = 0;
5491           if (TEST_BIT (hoist_vbeout[bb], i)
5492               && TEST_BIT (transpout[bb], i))
5493             {
5494               /* We've found a potentially hoistable expression, now
5495                  we look at every block BB dominates to see if it
5496                  computes the expression.  */
5497               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5498                 {
5499                   /* Ignore self dominance.  */
5500                   if (bb == dominated
5501                       || ! TEST_BIT (dominators[dominated], bb))
5502                     continue;
5503
5504                   /* We've found a dominated block, now see if it computes
5505                      the busy expression and whether or not moving that
5506                      expression to the "beginning" of that block is safe.  */
5507                   if (!TEST_BIT (antloc[dominated], i))
5508                     continue;
5509
5510                   /* Note if the expression would reach the dominated block
5511                      unimpared if it was placed at the end of BB.
5512
5513                      Keep track of how many times this expression is hoistable
5514                      from a dominated block into BB.  */
5515                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5516                     hoistable++;
5517                 }
5518
5519               /* If we found more than one hoistable occurence of this
5520                  expression, then note it in the bitmap of expressions to
5521                  hoist.  It makes no sense to hoist things which are computed
5522                  in only one BB, and doing so tends to pessimize register
5523                  allocation.  One could increase this value to try harder
5524                  to avoid any possible code expansion due to register
5525                  allocation issues; however experiments have shown that
5526                  the vast majority of hoistable expressions are only movable
5527                  from two successors, so raising this threshhold is likely
5528                  to nullify any benefit we get from code hoisting.  */
5529               if (hoistable > 1)
5530                 {
5531                   SET_BIT (hoist_exprs[bb], i);
5532                   found = 1;
5533                 }
5534             }
5535         }
5536
5537       /* If we found nothing to hoist, then quit now.  */
5538       if (! found)
5539         continue;
5540
5541       /* Loop over all the hoistable expressions.  */
5542       for (i = 0; i < hoist_exprs[bb]->n_bits; i++)
5543         {
5544           /* We want to insert the expression into BB only once, so
5545              note when we've inserted it.  */
5546           insn_inserted_p = 0;
5547
5548           /* These tests should be the same as the tests above.  */
5549           if (TEST_BIT (hoist_vbeout[bb], i))
5550             {
5551               /* We've found a potentially hoistable expression, now
5552                  we look at every block BB dominates to see if it
5553                  computes the expression.  */
5554               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5555                 {
5556                   /* Ignore self dominance.  */
5557                   if (bb == dominated
5558                       || ! TEST_BIT (dominators[dominated], bb))
5559                     continue;
5560
5561                   /* We've found a dominated block, now see if it computes
5562                      the busy expression and whether or not moving that
5563                      expression to the "beginning" of that block is safe.  */
5564                   if (!TEST_BIT (antloc[dominated], i))
5565                     continue;
5566
5567                   /* The expression is computed in the dominated block and
5568                      it would be safe to compute it at the start of the
5569                      dominated block.  Now we have to determine if the
5570                      expresion would reach the dominated block if it was
5571                      placed at the end of BB.  */
5572                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5573                     {
5574                       struct expr *expr = index_map[i];
5575                       struct occr *occr = expr->antic_occr;
5576                       rtx insn;
5577                       rtx set;
5578
5579
5580                       /* Find the right occurence of this expression.  */
5581                       while (BLOCK_NUM (occr->insn) != dominated && occr)
5582                         occr = occr->next;
5583
5584                       /* Should never happen.  */
5585                       if (!occr)
5586                         abort ();
5587
5588                       insn = occr->insn;
5589
5590                       set = single_set (insn);
5591                       if (! set)
5592                         abort ();
5593
5594                       /* Create a pseudo-reg to store the result of reaching
5595                          expressions into.  Get the mode for the new pseudo
5596                          from the mode of the original destination pseudo.  */
5597                       if (expr->reaching_reg == NULL)
5598                         expr->reaching_reg
5599                           = gen_reg_rtx (GET_MODE (SET_DEST (set)));
5600
5601                       /* In theory this should never fail since we're creating
5602                          a reg->reg copy.
5603
5604                          However, on the x86 some of the movXX patterns actually
5605                          contain clobbers of scratch regs.  This may cause the
5606                          insn created by validate_change to not match any
5607                          pattern and thus cause validate_change to fail.   */
5608                       if (validate_change (insn, &SET_SRC (set),
5609                                            expr->reaching_reg, 0))
5610                         {
5611                           occr->deleted_p = 1;
5612                           if (!insn_inserted_p)
5613                             {
5614                               insert_insn_end_bb (index_map[i], bb, 0);
5615                               insn_inserted_p = 1;
5616                             }
5617                         }
5618                     }
5619                 }
5620             }
5621         }
5622     }
5623     free (index_map);
5624 }
5625
5626 /* Top level routine to perform one code hoisting (aka unification) pass
5627
5628    Return non-zero if a change was made.  */
5629
5630 static int
5631 one_code_hoisting_pass ()
5632 {
5633   int changed = 0;
5634
5635   alloc_expr_hash_table (max_cuid);
5636   compute_expr_hash_table ();
5637   if (gcse_file)
5638     dump_hash_table (gcse_file, "Code Hosting Expressions", expr_hash_table,
5639                      expr_hash_table_size, n_exprs);
5640   if (n_exprs > 0)
5641     {
5642       alloc_code_hoist_mem (n_basic_blocks, n_exprs);
5643       compute_code_hoist_data ();
5644       hoist_code ();
5645       free_code_hoist_mem ();
5646     }
5647   free_expr_hash_table ();
5648
5649   return changed;
5650 }