gcc/gcse.c

   1 /* Global common subexpression elimination/Partial redundancy elimination
   2    and global constant/copy propagation for GNU compiler.
   3    Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /* TODO
  23    - reordering of memory allocation and freeing to be more space efficient
  24    - do rough calc of how many regs are needed in each block, and a rough
  25      calc of how many regs are available in each class and use that to
  26      throttle back the code in cases where RTX_COST is minimal.
  27    - dead store elimination
  28    - a store to the same address as a load does not kill the load if the
  29      source of the store is also the destination of the load.  Handling this
  30      allows more load motion, particularly out of loops.
  31    - ability to realloc sbitmap vectors would allow one initial computation
  32      of reg_set_in_block with only subsequent additions, rather than
  33      recomputing it for each pass
  34
  35 */
  36
  37 /* References searched while implementing this.
  38
  39    Compilers Principles, Techniques and Tools
  40    Aho, Sethi, Ullman
  41    Addison-Wesley, 1988
  42
  43    Global Optimization by Suppression of Partial Redundancies
  44    E. Morel, C. Renvoise
  45    communications of the acm, Vol. 22, Num. 2, Feb. 1979
  46
  47    A Portable Machine-Independent Global Optimizer - Design and Measurements
  48    Frederick Chow
  49    Stanford Ph.D. thesis, Dec. 1983
  50
  51    A Fast Algorithm for Code Movement Optimization
  52    D.M. Dhamdhere
  53    SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
  54
  55    A Solution to a Problem with Morel and Renvoise's
  56    Global Optimization by Suppression of Partial Redundancies
  57    K-H Drechsler, M.P. Stadel
  58    ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
  59
  60    Practical Adaptation of the Global Optimization
  61    Algorithm of Morel and Renvoise
  62    D.M. Dhamdhere
  63    ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
  64
  65    Efficiently Computing Static Single Assignment Form and the Control
  66    Dependence Graph
  67    R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
  68    ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
  69
  70    Lazy Code Motion
  71    J. Knoop, O. Ruthing, B. Steffen
  72    ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
  73
  74    What's In a Region?  Or Computing Control Dependence Regions in Near-Linear
  75    Time for Reducible Flow Control
  76    Thomas Ball
  77    ACM Letters on Programming Languages and Systems,
  78    Vol. 2, Num. 1-4, Mar-Dec 1993
  79
  80    An Efficient Representation for Sparse Sets
  81    Preston Briggs, Linda Torczon
  82    ACM Letters on Programming Languages and Systems,
  83    Vol. 2, Num. 1-4, Mar-Dec 1993
  84
  85    A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
  86    K-H Drechsler, M.P. Stadel
  87    ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
  88
  89    Partial Dead Code Elimination
  90    J. Knoop, O. Ruthing, B. Steffen
  91    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  92
  93    Effective Partial Redundancy Elimination
  94    P. Briggs, K.D. Cooper
  95    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  96
  97    The Program Structure Tree: Computing Control Regions in Linear Time
  98    R. Johnson, D. Pearson, K. Pingali
  99    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
 100
 101    Optimal Code Motion: Theory and Practice
 102    J. Knoop, O. Ruthing, B. Steffen
 103    ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
 104
 105    The power of assignment motion
 106    J. Knoop, O. Ruthing, B. Steffen
 107    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 108
 109    Global code motion / global value numbering
 110    C. Click
 111    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 112
 113    Value Driven Redundancy Elimination
 114    L.T. Simpson
 115    Rice University Ph.D. thesis, Apr. 1996
 116
 117    Value Numbering
 118    L.T. Simpson
 119    Massively Scalar Compiler Project, Rice University, Sep. 1996
 120
 121    High Performance Compilers for Parallel Computing
 122    Michael Wolfe
 123    Addison-Wesley, 1996
 124
 125    Advanced Compiler Design and Implementation
 126    Steven Muchnick
 127    Morgan Kaufmann, 1997
 128
 129    Building an Optimizing Compiler
 130    Robert Morgan
 131    Digital Press, 1998
 132
 133    People wishing to speed up the code here should read:
 134      Elimination Algorithms for Data Flow Analysis
 135      B.G. Ryder, M.C. Paull
 136      ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
 137
 138      How to Analyze Large Programs Efficiently and Informatively
 139      D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
 140      ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
 141
 142    People wishing to do something different can find various possibilities
 143    in the above papers and elsewhere.
 144 */
 145
 146 #include "config.h"
 147 #include "system.h"
 148 #include "toplev.h"
 149
 150 #include "rtl.h"
 151 #include "tm_p.h"
 152 #include "regs.h"
 153 #include "hard-reg-set.h"
 154 #include "flags.h"
 155 #include "real.h"
 156 #include "insn-config.h"
 157 #include "recog.h"
 158 #include "basic-block.h"
 159 #include "output.h"
 160 #include "function.h"
 161 #include "expr.h"
 162
 163 #include "obstack.h"
 164 #define obstack_chunk_alloc gmalloc
 165 #define obstack_chunk_free free
 166
 167 /* Maximum number of passes to perform.  */
 168 #define MAX_PASSES 1
 169
 170 /* Propagate flow information through back edges and thus enable PRE's
 171    moving loop invariant calculations out of loops.
 172
 173    Originally this tended to create worse overall code, but several
 174    improvements during the development of PRE seem to have made following
 175    back edges generally a win.
 176
 177    Note much of the loop invariant code motion done here would normally
 178    be done by loop.c, which has more heuristics for when to move invariants
 179    out of loops.  At some point we might need to move some of those
 180    heuristics into gcse.c.  */
 181 #define FOLLOW_BACK_EDGES 1
 182
 183 /* We support GCSE via Partial Redundancy Elimination.  PRE optimizations
 184    are a superset of those done by GCSE.
 185
 186    We perform the following steps:
 187
 188    1) Compute basic block information.
 189
 190    2) Compute table of places where registers are set.
 191
 192    3) Perform copy/constant propagation.
 193
 194    4) Perform global cse.
 195
 196    5) Perform another pass of copy/constant propagation.
 197
 198    Two passes of copy/constant propagation are done because the first one
 199    enables more GCSE and the second one helps to clean up the copies that
 200    GCSE creates.  This is needed more for PRE than for Classic because Classic
 201    GCSE will try to use an existing register containing the common
 202    subexpression rather than create a new one.  This is harder to do for PRE
 203    because of the code motion (which Classic GCSE doesn't do).
 204
 205    Expressions we are interested in GCSE-ing are of the form
 206    (set (pseudo-reg) (expression)).
 207    Function want_to_gcse_p says what these are.
 208
 209    PRE handles moving invariant expressions out of loops (by treating them as
 210    partially redundant).
 211
 212    Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
 213    assignment) based GVN (global value numbering).  L. T. Simpson's paper
 214    (Rice University) on value numbering is a useful reference for this.
 215
 216    **********************
 217
 218    We used to support multiple passes but there are diminishing returns in
 219    doing so.  The first pass usually makes 90% of the changes that are doable.
 220    A second pass can make a few more changes made possible by the first pass.
 221    Experiments show any further passes don't make enough changes to justify
 222    the expense.
 223
 224    A study of spec92 using an unlimited number of passes:
 225    [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
 226    [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
 227    [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
 228
 229    It was found doing copy propagation between each pass enables further
 230    substitutions.
 231
 232    PRE is quite expensive in complicated functions because the DFA can take
 233    awhile to converge.  Hence we only perform one pass.  Macro MAX_PASSES can
 234    be modified if one wants to experiment.
 235
 236    **********************
 237
 238    The steps for PRE are:
 239
 240    1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
 241
 242    2) Perform the data flow analysis for PRE.
 243
 244    3) Delete the redundant instructions
 245
 246    4) Insert the required copies [if any] that make the partially
 247       redundant instructions fully redundant.
 248
 249    5) For other reaching expressions, insert an instruction to copy the value
 250       to a newly created pseudo that will reach the redundant instruction.
 251
 252    The deletion is done first so that when we do insertions we
 253    know which pseudo reg to use.
 254
 255    Various papers have argued that PRE DFA is expensive (O(n^2)) and others
 256    argue it is not.  The number of iterations for the algorithm to converge
 257    is typically 2-4 so I don't view it as that expensive (relatively speaking).
 258
 259    PRE GCSE depends heavily on the second CSE pass to clean up the copies
 260    we create.  To make an expression reach the place where it's redundant,
 261    the result of the expression is copied to a new register, and the redundant
 262    expression is deleted by replacing it with this new register.  Classic GCSE
 263    doesn't have this problem as much as it computes the reaching defs of
 264    each register in each block and thus can try to use an existing register.
 265
 266    **********************
 267
 268    A fair bit of simplicity is created by creating small functions for simple
 269    tasks, even when the function is only called in one place.  This may
 270    measurably slow things down [or may not] by creating more function call
 271    overhead than is necessary.  The source is laid out so that it's trivial
 272    to make the affected functions inline so that one can measure what speed
 273    up, if any, can be achieved, and maybe later when things settle things can
 274    be rearranged.
 275
 276    Help stamp out big monolithic functions!  */
 277 \f
 278 /* GCSE global vars.  */
 279
 280 /* -dG dump file.  */
 281 static FILE *gcse_file;
 282
 283 /* Note whether or not we should run jump optimization after gcse.  We
 284    want to do this for two cases.
 285
 286     * If we changed any jumps via cprop.
 287
 288     * If we added any labels via edge splitting.  */
 289
 290 static int run_jump_opt_after_gcse;
 291
 292 /* Bitmaps are normally not included in debugging dumps.
 293    However it's useful to be able to print them from GDB.
 294    We could create special functions for this, but it's simpler to
 295    just allow passing stderr to the dump_foo fns.  Since stderr can
 296    be a macro, we store a copy here.  */
 297 static FILE *debug_stderr;
 298
 299 /* An obstack for our working variables.  */
 300 static struct obstack gcse_obstack;
 301
 302 /* Non-zero for each mode that supports (set (reg) (reg)).
 303    This is trivially true for integer and floating point values.
 304    It may or may not be true for condition codes.  */
 305 static char can_copy_p[(int) NUM_MACHINE_MODES];
 306
 307 /* Non-zero if can_copy_p has been initialized.  */
 308 static int can_copy_init_p;
 309
 310 struct reg_use {rtx reg_rtx; };
 311
 312 /* Hash table of expressions.  */
 313
 314 struct expr
 315 {
 316   /* The expression (SET_SRC for expressions, PATTERN for assignments).  */
 317   rtx expr;
 318   /* Index in the available expression bitmaps.  */
 319   int bitmap_index;
 320   /* Next entry with the same hash.  */
 321   struct expr *next_same_hash;
 322   /* List of anticipatable occurrences in basic blocks in the function.
 323      An "anticipatable occurrence" is one that is the first occurrence in the
 324      basic block, the operands are not modified in the basic block prior
 325      to the occurrence and the output is not used between the start of
 326      the block and the occurrence.  */
 327   struct occr *antic_occr;
 328   /* List of available occurrence in basic blocks in the function.
 329      An "available occurrence" is one that is the last occurrence in the
 330      basic block and the operands are not modified by following statements in
 331      the basic block [including this insn].  */
 332   struct occr *avail_occr;
 333   /* Non-null if the computation is PRE redundant.
 334      The value is the newly created pseudo-reg to record a copy of the
 335      expression in all the places that reach the redundant copy.  */
 336   rtx reaching_reg;
 337 };
 338
 339 /* Occurrence of an expression.
 340    There is one per basic block.  If a pattern appears more than once the
 341    last appearance is used [or first for anticipatable expressions].  */
 342
 343 struct occr
 344 {
 345   /* Next occurrence of this expression.  */
 346   struct occr *next;
 347   /* The insn that computes the expression.  */
 348   rtx insn;
 349   /* Non-zero if this [anticipatable] occurrence has been deleted.  */
 350   char deleted_p;
 351   /* Non-zero if this [available] occurrence has been copied to
 352      reaching_reg.  */
 353   /* ??? This is mutually exclusive with deleted_p, so they could share
 354      the same byte.  */
 355   char copied_p;
 356 };
 357
 358 /* Expression and copy propagation hash tables.
 359    Each hash table is an array of buckets.
 360    ??? It is known that if it were an array of entries, structure elements
 361    `next_same_hash' and `bitmap_index' wouldn't be necessary.  However, it is
 362    not clear whether in the final analysis a sufficient amount of memory would
 363    be saved as the size of the available expression bitmaps would be larger
 364    [one could build a mapping table without holes afterwards though].
 365    Someday I'll perform the computation and figure it out.  */
 366
 367 /* Total size of the expression hash table, in elements.  */
 368 static unsigned int expr_hash_table_size;
 369
 370 /* The table itself.
 371    This is an array of `expr_hash_table_size' elements.  */
 372 static struct expr **expr_hash_table;
 373
 374 /* Total size of the copy propagation hash table, in elements.  */
 375 static unsigned int set_hash_table_size;
 376
 377 /* The table itself.
 378    This is an array of `set_hash_table_size' elements.  */
 379 static struct expr **set_hash_table;
 380
 381 /* Mapping of uids to cuids.
 382    Only real insns get cuids.  */
 383 static int *uid_cuid;
 384
 385 /* Highest UID in UID_CUID.  */
 386 static int max_uid;
 387
 388 /* Get the cuid of an insn.  */
 389 #ifdef ENABLE_CHECKING
 390 #define INSN_CUID(INSN) (INSN_UID (INSN) > max_uid ? (abort (), 0) : uid_cuid[INSN_UID (INSN)])
 391 #else
 392 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
 393 #endif
 394
 395 /* Number of cuids.  */
 396 static int max_cuid;
 397
 398 /* Mapping of cuids to insns.  */
 399 static rtx *cuid_insn;
 400
 401 /* Get insn from cuid.  */
 402 #define CUID_INSN(CUID) (cuid_insn[CUID])
 403
 404 /* Maximum register number in function prior to doing gcse + 1.
 405    Registers created during this pass have regno >= max_gcse_regno.
 406    This is named with "gcse" to not collide with global of same name.  */
 407 static unsigned int max_gcse_regno;
 408
 409 /* Maximum number of cse-able expressions found.  */
 410 static int n_exprs;
 411
 412 /* Maximum number of assignments for copy propagation found.  */
 413 static int n_sets;
 414
 415 /* Table of registers that are modified.
 416
 417    For each register, each element is a list of places where the pseudo-reg
 418    is set.
 419
 420    For simplicity, GCSE is done on sets of pseudo-regs only.  PRE GCSE only
 421    requires knowledge of which blocks kill which regs [and thus could use
 422    a bitmap instead of the lists `reg_set_table' uses].
 423
 424    `reg_set_table' and could be turned into an array of bitmaps (num-bbs x
 425    num-regs) [however perhaps it may be useful to keep the data as is].  One
 426    advantage of recording things this way is that `reg_set_table' is fairly
 427    sparse with respect to pseudo regs but for hard regs could be fairly dense
 428    [relatively speaking].  And recording sets of pseudo-regs in lists speeds
 429    up functions like compute_transp since in the case of pseudo-regs we only
 430    need to iterate over the number of times a pseudo-reg is set, not over the
 431    number of basic blocks [clearly there is a bit of a slow down in the cases
 432    where a pseudo is set more than once in a block, however it is believed
 433    that the net effect is to speed things up].  This isn't done for hard-regs
 434    because recording call-clobbered hard-regs in `reg_set_table' at each
 435    function call can consume a fair bit of memory, and iterating over
 436    hard-regs stored this way in compute_transp will be more expensive.  */
 437
 438 typedef struct reg_set
 439 {
 440   /* The next setting of this register.  */
 441   struct reg_set *next;
 442   /* The insn where it was set.  */
 443   rtx insn;
 444 } reg_set;
 445
 446 static reg_set **reg_set_table;
 447
 448 /* Size of `reg_set_table'.
 449    The table starts out at max_gcse_regno + slop, and is enlarged as
 450    necessary.  */
 451 static int reg_set_table_size;
 452
 453 /* Amount to grow `reg_set_table' by when it's full.  */
 454 #define REG_SET_TABLE_SLOP 100
 455
 456 /* Bitmap containing one bit for each register in the program.
 457    Used when performing GCSE to track which registers have been set since
 458    the start of the basic block.  */
 459 static sbitmap reg_set_bitmap;
 460
 461 /* For each block, a bitmap of registers set in the block.
 462    This is used by expr_killed_p and compute_transp.
 463    It is computed during hash table computation and not by compute_sets
 464    as it includes registers added since the last pass (or between cprop and
 465    gcse) and it's currently not easy to realloc sbitmap vectors.  */
 466 static sbitmap *reg_set_in_block;
 467
 468 /* For each block, non-zero if memory is set in that block.
 469    This is computed during hash table computation and is used by
 470    expr_killed_p and compute_transp.
 471    ??? Handling of memory is very simple, we don't make any attempt
 472    to optimize things (later).
 473    ??? This can be computed by compute_sets since the information
 474    doesn't change.  */
 475 static char *mem_set_in_block;
 476
 477 /* Various variables for statistics gathering.  */
 478
 479 /* Memory used in a pass.
 480    This isn't intended to be absolutely precise.  Its intent is only
 481    to keep an eye on memory usage.  */
 482 static int bytes_used;
 483
 484 /* GCSE substitutions made.  */
 485 static int gcse_subst_count;
 486 /* Number of copy instructions created.  */
 487 static int gcse_create_count;
 488 /* Number of constants propagated.  */
 489 static int const_prop_count;
 490 /* Number of copys propagated.  */
 491 static int copy_prop_count;
 492 \f
 493 /* These variables are used by classic GCSE.
 494    Normally they'd be defined a bit later, but `rd_gen' needs to
 495    be declared sooner.  */
 496
 497 /* Each block has a bitmap of each type.
 498    The length of each blocks bitmap is:
 499
 500        max_cuid  - for reaching definitions
 501        n_exprs - for available expressions
 502
 503    Thus we view the bitmaps as 2 dimensional arrays.  i.e.
 504    rd_kill[block_num][cuid_num]
 505    ae_kill[block_num][expr_num]                  */
 506
 507 /* For reaching defs */
 508 static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
 509
 510 /* for available exprs */
 511 static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
 512
 513 /* Objects of this type are passed around by the null-pointer check
 514    removal routines.  */
 515 struct null_pointer_info
 516 {
 517   /* The basic block being processed.  */
 518   int current_block;
 519   /* The first register to be handled in this pass.  */
 520   unsigned int min_reg;
 521   /* One greater than the last register to be handled in this pass.  */
 522   unsigned int max_reg;
 523   sbitmap *nonnull_local;
 524   sbitmap *nonnull_killed;
 525 };
 526 \f
 527 static void compute_can_copy    PARAMS ((void));
 528 static char *gmalloc            PARAMS ((unsigned int));
 529 static char *grealloc           PARAMS ((char *, unsigned int));
 530 static char *gcse_alloc         PARAMS ((unsigned long));
 531 static void alloc_gcse_mem      PARAMS ((rtx));
 532 static void free_gcse_mem       PARAMS ((void));
 533 static void alloc_reg_set_mem   PARAMS ((int));
 534 static void free_reg_set_mem    PARAMS ((void));
 535 static int get_bitmap_width     PARAMS ((int, int, int));
 536 static void record_one_set      PARAMS ((int, rtx));
 537 static void record_set_info     PARAMS ((rtx, rtx, void *));
 538 static void compute_sets        PARAMS ((rtx));
 539 static void hash_scan_insn      PARAMS ((rtx, int, int));
 540 static void hash_scan_set       PARAMS ((rtx, rtx, int));
 541 static void hash_scan_clobber   PARAMS ((rtx, rtx));
 542 static void hash_scan_call      PARAMS ((rtx, rtx));
 543 static int want_to_gcse_p       PARAMS ((rtx));
 544 static int oprs_unchanged_p     PARAMS ((rtx, rtx, int));
 545 static int oprs_anticipatable_p PARAMS ((rtx, rtx));
 546 static int oprs_available_p     PARAMS ((rtx, rtx));
 547 static void insert_expr_in_table PARAMS ((rtx, enum machine_mode, rtx,
 548                                           int, int));
 549 static void insert_set_in_table PARAMS ((rtx, rtx));
 550 static unsigned int hash_expr   PARAMS ((rtx, enum machine_mode, int *, int));
 551 static unsigned int hash_expr_1 PARAMS ((rtx, enum machine_mode, int *));
 552 static unsigned int hash_string_1 PARAMS ((const char *));
 553 static unsigned int hash_set    PARAMS ((int, int));
 554 static int expr_equiv_p         PARAMS ((rtx, rtx));
 555 static void record_last_reg_set_info PARAMS ((rtx, int));
 556 static void record_last_mem_set_info PARAMS ((rtx));
 557 static void record_last_set_info PARAMS ((rtx, rtx, void *));
 558 static void compute_hash_table  PARAMS ((int));
 559 static void alloc_set_hash_table PARAMS ((int));
 560 static void free_set_hash_table PARAMS ((void));
 561 static void compute_set_hash_table PARAMS ((void));
 562 static void alloc_expr_hash_table PARAMS ((unsigned int));
 563 static void free_expr_hash_table PARAMS ((void));
 564 static void compute_expr_hash_table PARAMS ((void));
 565 static void dump_hash_table     PARAMS ((FILE *, const char *, struct expr **,
 566                                          int, int));
 567 static struct expr *lookup_expr PARAMS ((rtx));
 568 static struct expr *lookup_set  PARAMS ((unsigned int, rtx));
 569 static struct expr *next_set    PARAMS ((unsigned int, struct expr *));
 570 static void reset_opr_set_tables PARAMS ((void));
 571 static int oprs_not_set_p       PARAMS ((rtx, rtx));
 572 static void mark_call           PARAMS ((rtx));
 573 static void mark_set            PARAMS ((rtx, rtx));
 574 static void mark_clobber        PARAMS ((rtx, rtx));
 575 static void mark_oprs_set       PARAMS ((rtx));
 576 static void alloc_cprop_mem     PARAMS ((int, int));
 577 static void free_cprop_mem      PARAMS ((void));
 578 static void compute_transp      PARAMS ((rtx, int, sbitmap *, int));
 579 static void compute_transpout   PARAMS ((void));
 580 static void compute_local_properties PARAMS ((sbitmap *, sbitmap *, sbitmap *,
 581                                               int));
 582 static void compute_cprop_data  PARAMS ((void));
 583 static void find_used_regs      PARAMS ((rtx));
 584 static int try_replace_reg      PARAMS ((rtx, rtx, rtx));
 585 static struct expr *find_avail_set PARAMS ((int, rtx));
 586 static int cprop_jump           PARAMS ((rtx, rtx, struct reg_use *, rtx));
 587 #ifdef HAVE_cc0
 588 static int cprop_cc0_jump       PARAMS ((rtx, struct reg_use *, rtx));
 589 #endif
 590 static int cprop_insn           PARAMS ((rtx, int));
 591 static int cprop                PARAMS ((int));
 592 static int one_cprop_pass       PARAMS ((int, int));
 593 static void alloc_pre_mem       PARAMS ((int, int));
 594 static void free_pre_mem        PARAMS ((void));
 595 static void compute_pre_data    PARAMS ((void));
 596 static int pre_expr_reaches_here_p PARAMS ((int, struct expr *, int));
 597 static void insert_insn_end_bb  PARAMS ((struct expr *, int, int));
 598 static void pre_insert_copy_insn PARAMS ((struct expr *, rtx));
 599 static void pre_insert_copies   PARAMS ((void));
 600 static int pre_delete           PARAMS ((void));
 601 static int pre_gcse             PARAMS ((void));
 602 static int one_pre_gcse_pass    PARAMS ((int));
 603 static void add_label_notes     PARAMS ((rtx, rtx));
 604 static void alloc_code_hoist_mem PARAMS ((int, int));
 605 static void free_code_hoist_mem PARAMS ((void));
 606 static void compute_code_hoist_vbeinout PARAMS ((void));
 607 static void compute_code_hoist_data PARAMS ((void));
 608 static int hoist_expr_reaches_here_p PARAMS ((int, int, int, char *));
 609 static void hoist_code          PARAMS ((void));
 610 static int one_code_hoisting_pass PARAMS ((void));
 611 static void alloc_rd_mem        PARAMS ((int, int));
 612 static void free_rd_mem         PARAMS ((void));
 613 static void handle_rd_kill_set  PARAMS ((rtx, int, int));
 614 static void compute_kill_rd     PARAMS ((void));
 615 static void compute_rd          PARAMS ((void));
 616 static void alloc_avail_expr_mem PARAMS ((int, int));
 617 static void free_avail_expr_mem PARAMS ((void));
 618 static void compute_ae_gen      PARAMS ((void));
 619 static int expr_killed_p        PARAMS ((rtx, int));
 620 static void compute_ae_kill     PARAMS ((sbitmap *, sbitmap *));
 621 static int expr_reaches_here_p  PARAMS ((struct occr *, struct expr *,
 622                                          int, int));
 623 static rtx computing_insn       PARAMS ((struct expr *, rtx));
 624 static int def_reaches_here_p   PARAMS ((rtx, rtx));
 625 static int can_disregard_other_sets PARAMS ((struct reg_set **, rtx, int));
 626 static int handle_avail_expr    PARAMS ((rtx, struct expr *));
 627 static int classic_gcse         PARAMS ((void));
 628 static int one_classic_gcse_pass PARAMS ((int));
 629 static void invalidate_nonnull_info PARAMS ((rtx, rtx, void *));
 630 static void delete_null_pointer_checks_1 PARAMS ((unsigned int *, sbitmap *,
 631                                                   sbitmap *,
 632                                                   struct null_pointer_info *));
 633 static rtx process_insert_insn  PARAMS ((struct expr *));
 634 static int pre_edge_insert      PARAMS ((struct edge_list *, struct expr **));
 635 static int expr_reaches_here_p_work PARAMS ((struct occr *, struct expr *,
 636                                              int, int, char *));
 637 static int pre_expr_reaches_here_p_work PARAMS ((int, struct expr *,
 638                                                  int, char *));
 639 \f
 640 /* Entry point for global common subexpression elimination.
 641    F is the first instruction in the function.  */
 642
 643 int
 644 gcse_main (f, file)
 645      rtx f;
 646      FILE *file;
 647 {
 648   int changed, pass;
 649   /* Bytes used at start of pass.  */
 650   int initial_bytes_used;
 651   /* Maximum number of bytes used by a pass.  */
 652   int max_pass_bytes;
 653   /* Point to release obstack data from for each pass.  */
 654   char *gcse_obstack_bottom;
 655
 656   /* We do not construct an accurate cfg in functions which call
 657      setjmp, so just punt to be safe.  */
 658   if (current_function_calls_setjmp)
 659     return 0;
 660
 661   /* Assume that we do not need to run jump optimizations after gcse.  */
 662   run_jump_opt_after_gcse = 0;
 663
 664   /* For calling dump_foo fns from gdb.  */
 665   debug_stderr = stderr;
 666   gcse_file = file;
 667
 668   /* Identify the basic block information for this function, including
 669      successors and predecessors.  */
 670   max_gcse_regno = max_reg_num ();
 671
 672   if (file)
 673     dump_flow_info (file);
 674
 675   /* Return if there's nothing to do.  */
 676   if (n_basic_blocks <= 1)
 677     return 0;
 678
 679   /* Trying to perform global optimizations on flow graphs which have
 680      a high connectivity will take a long time and is unlikely to be
 681      particularly useful.
 682
 683      In normal circumstances a cfg should have about twice has many edges
 684      as blocks.  But we do not want to punish small functions which have
 685      a couple switch statements.  So we require a relatively large number
 686      of basic blocks and the ratio of edges to blocks to be high.  */
 687   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
 688     {
 689       if (warn_disabled_optimization)
 690       warning ("GCSE disabled: %d > 1000 basic blocks and %d >= 20 edges/basic block",
 691                n_basic_blocks, n_edges / n_basic_blocks);
 692       return 0;
 693     }
 694
 695   /* See what modes support reg/reg copy operations.  */
 696   if (! can_copy_init_p)
 697     {
 698       compute_can_copy ();
 699       can_copy_init_p = 1;
 700     }
 701
 702   gcc_obstack_init (&gcse_obstack);
 703   bytes_used = 0;
 704
 705   /* Record where pseudo-registers are set.  This data is kept accurate
 706      during each pass.  ??? We could also record hard-reg information here
 707      [since it's unchanging], however it is currently done during hash table
 708      computation.
 709
 710      It may be tempting to compute MEM set information here too, but MEM sets
 711      will be subject to code motion one day and thus we need to compute
 712      information about memory sets when we build the hash tables.  */
 713
 714   alloc_reg_set_mem (max_gcse_regno);
 715   compute_sets (f);
 716
 717   pass = 0;
 718   initial_bytes_used = bytes_used;
 719   max_pass_bytes = 0;
 720   gcse_obstack_bottom = gcse_alloc (1);
 721   changed = 1;
 722   while (changed && pass < MAX_PASSES)
 723     {
 724       changed = 0;
 725       if (file)
 726         fprintf (file, "GCSE pass %d\n\n", pass + 1);
 727
 728       /* Initialize bytes_used to the space for the pred/succ lists,
 729          and the reg_set_table data.  */
 730       bytes_used = initial_bytes_used;
 731
 732       /* Each pass may create new registers, so recalculate each time.  */
 733       max_gcse_regno = max_reg_num ();
 734
 735       alloc_gcse_mem (f);
 736
 737       /* Don't allow constant propagation to modify jumps
 738          during this pass.  */
 739       changed = one_cprop_pass (pass + 1, 0);
 740
 741       if (optimize_size)
 742         changed |= one_classic_gcse_pass (pass + 1);
 743       else
 744         {
 745           changed |= one_pre_gcse_pass (pass + 1);
 746           free_reg_set_mem ();
 747           alloc_reg_set_mem (max_reg_num ());
 748           compute_sets (f);
 749           run_jump_opt_after_gcse = 1;
 750         }
 751
 752       if (max_pass_bytes < bytes_used)
 753         max_pass_bytes = bytes_used;
 754
 755       /* Free up memory, then reallocate for code hoisting.  We can
 756          not re-use the existing allocated memory because the tables
 757          will not have info for the insns or registers created by
 758          partial redundancy elimination.  */
 759       free_gcse_mem ();
 760
 761       /* It does not make sense to run code hoisting unless we optimizing
 762          for code size -- it rarely makes programs faster, and can make
 763          them bigger if we did partial redundancy elimination (when optimizing
 764          for space, we use a classic gcse algorithm instead of partial
 765          redundancy algorithms).  */
 766       if (optimize_size)
 767         {
 768           max_gcse_regno = max_reg_num ();
 769           alloc_gcse_mem (f);
 770           changed |= one_code_hoisting_pass ();
 771           free_gcse_mem ();
 772
 773           if (max_pass_bytes < bytes_used)
 774             max_pass_bytes = bytes_used;
 775         }
 776
 777       if (file)
 778         {
 779           fprintf (file, "\n");
 780           fflush (file);
 781         }
 782
 783       obstack_free (&gcse_obstack, gcse_obstack_bottom);
 784       pass++;
 785     }
 786
 787   /* Do one last pass of copy propagation, including cprop into
 788      conditional jumps.  */
 789
 790   max_gcse_regno = max_reg_num ();
 791   alloc_gcse_mem (f);
 792   /* This time, go ahead and allow cprop to alter jumps.  */
 793   one_cprop_pass (pass + 1, 1);
 794   free_gcse_mem ();
 795
 796   if (file)
 797     {
 798       fprintf (file, "GCSE of %s: %d basic blocks, ",
 799                current_function_name, n_basic_blocks);
 800       fprintf (file, "%d pass%s, %d bytes\n\n",
 801                pass, pass > 1 ? "es" : "", max_pass_bytes);
 802     }
 803
 804   obstack_free (&gcse_obstack, NULL_PTR);
 805   free_reg_set_mem ();
 806   return run_jump_opt_after_gcse;
 807 }
 808 \f
 809 /* Misc. utilities.  */
 810
 811 /* Compute which modes support reg/reg copy operations.  */
 812
 813 static void
 814 compute_can_copy ()
 815 {
 816   int i;
 817 #ifndef AVOID_CCMODE_COPIES
 818   rtx reg,insn;
 819 #endif
 820   memset (can_copy_p, 0, NUM_MACHINE_MODES);
 821
 822   start_sequence ();
 823   for (i = 0; i < NUM_MACHINE_MODES; i++)
 824     if (GET_MODE_CLASS (i) == MODE_CC)
 825       {
 826 #ifdef AVOID_CCMODE_COPIES
 827         can_copy_p[i] = 0;
 828 #else
 829         reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
 830         insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
 831         if (recog (PATTERN (insn), insn, NULL_PTR) >= 0)
 832           can_copy_p[i] = 1;
 833 #endif
 834       }
 835     else
 836       can_copy_p[i] = 1;
 837
 838   end_sequence ();
 839 }
 840 \f
 841 /* Cover function to xmalloc to record bytes allocated.  */
 842
 843 static char *
 844 gmalloc (size)
 845      unsigned int size;
 846 {
 847   bytes_used += size;
 848   return xmalloc (size);
 849 }
 850
 851 /* Cover function to xrealloc.
 852    We don't record the additional size since we don't know it.
 853    It won't affect memory usage stats much anyway.  */
 854
 855 static char *
 856 grealloc (ptr, size)
 857      char *ptr;
 858      unsigned int size;
 859 {
 860   return xrealloc (ptr, size);
 861 }
 862
 863 /* Cover function to obstack_alloc.
 864    We don't need to record the bytes allocated here since
 865    obstack_chunk_alloc is set to gmalloc.  */
 866
 867 static char *
 868 gcse_alloc (size)
 869      unsigned long size;
 870 {
 871   return (char *) obstack_alloc (&gcse_obstack, size);
 872 }
 873
 874 /* Allocate memory for the cuid mapping array,
 875    and reg/memory set tracking tables.
 876
 877    This is called at the start of each pass.  */
 878
 879 static void
 880 alloc_gcse_mem (f)
 881      rtx f;
 882 {
 883   int i,n;
 884   rtx insn;
 885
 886   /* Find the largest UID and create a mapping from UIDs to CUIDs.
 887      CUIDs are like UIDs except they increase monotonically, have no gaps,
 888      and only apply to real insns.  */
 889
 890   max_uid = get_max_uid ();
 891   n = (max_uid + 1) * sizeof (int);
 892   uid_cuid = (int *) gmalloc (n);
 893   memset ((char *) uid_cuid, 0, n);
 894   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 895     {
 896       if (INSN_P (insn))
 897         uid_cuid[INSN_UID (insn)] = i++;
 898       else
 899         uid_cuid[INSN_UID (insn)] = i;
 900     }
 901
 902   /* Create a table mapping cuids to insns.  */
 903
 904   max_cuid = i;
 905   n = (max_cuid + 1) * sizeof (rtx);
 906   cuid_insn = (rtx *) gmalloc (n);
 907   memset ((char *) cuid_insn, 0, n);
 908   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 909     if (INSN_P (insn))
 910       CUID_INSN (i++) = insn;
 911
 912   /* Allocate vars to track sets of regs.  */
 913   reg_set_bitmap = (sbitmap) sbitmap_alloc (max_gcse_regno);
 914
 915   /* Allocate vars to track sets of regs, memory per block.  */
 916   reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks,
 917                                                        max_gcse_regno);
 918   mem_set_in_block = (char *) gmalloc (n_basic_blocks);
 919 }
 920
 921 /* Free memory allocated by alloc_gcse_mem.  */
 922
 923 static void
 924 free_gcse_mem ()
 925 {
 926   free (uid_cuid);
 927   free (cuid_insn);
 928
 929   free (reg_set_bitmap);
 930
 931   free (reg_set_in_block);
 932   free (mem_set_in_block);
 933 }
 934
 935 /* Many of the global optimization algorithms work by solving dataflow
 936    equations for various expressions.  Initially, some local value is
 937    computed for each expression in each block.  Then, the values across the
 938    various blocks are combined (by following flow graph edges) to arrive at
 939    global values.  Conceptually, each set of equations is independent.  We
 940    may therefore solve all the equations in parallel, solve them one at a
 941    time, or pick any intermediate approach.
 942
 943    When you're going to need N two-dimensional bitmaps, each X (say, the
 944    number of blocks) by Y (say, the number of expressions), call this
 945    function.  It's not important what X and Y represent; only that Y
 946    correspond to the things that can be done in parallel.  This function will
 947    return an appropriate chunking factor C; you should solve C sets of
 948    equations in parallel.  By going through this function, we can easily
 949    trade space against time; by solving fewer equations in parallel we use
 950    less space.  */
 951
 952 static int
 953 get_bitmap_width (n, x, y)
 954      int n;
 955      int x;
 956      int y;
 957 {
 958   /* It's not really worth figuring out *exactly* how much memory will
 959      be used by a particular choice.  The important thing is to get
 960      something approximately right.  */
 961   size_t max_bitmap_memory = 10 * 1024 * 1024;
 962
 963   /* The number of bytes we'd use for a single column of minimum
 964      width.  */
 965   size_t column_size = n * x * sizeof (SBITMAP_ELT_TYPE);
 966
 967   /* Often, it's reasonable just to solve all the equations in
 968      parallel.  */
 969   if (column_size * SBITMAP_SET_SIZE (y) <= max_bitmap_memory)
 970     return y;
 971
 972   /* Otherwise, pick the largest width we can, without going over the
 973      limit.  */
 974   return SBITMAP_ELT_BITS * ((max_bitmap_memory + column_size - 1)
 975                              / column_size);
 976 }
 977 \f
 978 /* Compute the local properties of each recorded expression.
 979
 980    Local properties are those that are defined by the block, irrespective of
 981    other blocks.
 982
 983    An expression is transparent in a block if its operands are not modified
 984    in the block.
 985
 986    An expression is computed (locally available) in a block if it is computed
 987    at least once and expression would contain the same value if the
 988    computation was moved to the end of the block.
 989
 990    An expression is locally anticipatable in a block if it is computed at
 991    least once and expression would contain the same value if the computation
 992    was moved to the beginning of the block.
 993
 994    We call this routine for cprop, pre and code hoisting.  They all compute
 995    basically the same information and thus can easily share this code.
 996
 997    TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
 998    properties.  If NULL, then it is not necessary to compute or record that
 999    particular property.
1000
1001    SETP controls which hash table to look at.  If zero, this routine looks at
1002    the expr hash table; if nonzero this routine looks at the set hash table.
1003    Additionally, TRANSP is computed as ~TRANSP, since this is really cprop's
1004    ABSALTERED.  */
1005
1006 static void
1007 compute_local_properties (transp, comp, antloc, setp)
1008      sbitmap *transp;
1009      sbitmap *comp;
1010      sbitmap *antloc;
1011      int setp;
1012 {
1013   unsigned int i, hash_table_size;
1014   struct expr **hash_table;
1015
1016   /* Initialize any bitmaps that were passed in.  */
1017   if (transp)
1018     {
1019       if (setp)
1020         sbitmap_vector_zero (transp, n_basic_blocks);
1021       else
1022         sbitmap_vector_ones (transp, n_basic_blocks);
1023     }
1024
1025   if (comp)
1026     sbitmap_vector_zero (comp, n_basic_blocks);
1027   if (antloc)
1028     sbitmap_vector_zero (antloc, n_basic_blocks);
1029
1030   /* We use the same code for cprop, pre and hoisting.  For cprop
1031      we care about the set hash table, for pre and hoisting we
1032      care about the expr hash table.  */
1033   hash_table_size = setp ? set_hash_table_size : expr_hash_table_size;
1034   hash_table = setp ? set_hash_table : expr_hash_table;
1035
1036   for (i = 0; i < hash_table_size; i++)
1037     {
1038       struct expr *expr;
1039
1040       for (expr = hash_table[i]; expr != NULL; expr = expr->next_same_hash)
1041         {
1042           int indx = expr->bitmap_index;
1043           struct occr *occr;
1044
1045           /* The expression is transparent in this block if it is not killed.
1046              We start by assuming all are transparent [none are killed], and
1047              then reset the bits for those that are.  */
1048           if (transp)
1049             compute_transp (expr->expr, indx, transp, setp);
1050
1051           /* The occurrences recorded in antic_occr are exactly those that
1052              we want to set to non-zero in ANTLOC.  */
1053           if (antloc)
1054             for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1055               {
1056                 SET_BIT (antloc[BLOCK_NUM (occr->insn)], indx);
1057
1058                 /* While we're scanning the table, this is a good place to
1059                    initialize this.  */
1060                 occr->deleted_p = 0;
1061               }
1062
1063           /* The occurrences recorded in avail_occr are exactly those that
1064              we want to set to non-zero in COMP.  */
1065           if (comp)
1066             for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
1067               {
1068                 SET_BIT (comp[BLOCK_NUM (occr->insn)], indx);
1069
1070                 /* While we're scanning the table, this is a good place to
1071                    initialize this.  */
1072                 occr->copied_p = 0;
1073               }
1074
1075           /* While we're scanning the table, this is a good place to
1076              initialize this.  */
1077           expr->reaching_reg = 0;
1078         }
1079     }
1080 }
1081 \f
1082 /* Register set information.
1083
1084    `reg_set_table' records where each register is set or otherwise
1085    modified.  */
1086
1087 static struct obstack reg_set_obstack;
1088
1089 static void
1090 alloc_reg_set_mem (n_regs)
1091      int n_regs;
1092 {
1093   unsigned int n;
1094
1095   reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
1096   n = reg_set_table_size * sizeof (struct reg_set *);
1097   reg_set_table = (struct reg_set **) gmalloc (n);
1098   memset ((char *) reg_set_table, 0, n);
1099
1100   gcc_obstack_init (&reg_set_obstack);
1101 }
1102
1103 static void
1104 free_reg_set_mem ()
1105 {
1106   free (reg_set_table);
1107   obstack_free (&reg_set_obstack, NULL_PTR);
1108 }
1109
1110 /* Record REGNO in the reg_set table.  */
1111
1112 static void
1113 record_one_set (regno, insn)
1114      int regno;
1115      rtx insn;
1116 {
1117   /* allocate a new reg_set element and link it onto the list */
1118   struct reg_set *new_reg_info;
1119
1120   /* If the table isn't big enough, enlarge it.  */
1121   if (regno >= reg_set_table_size)
1122     {
1123       int new_size = regno + REG_SET_TABLE_SLOP;
1124
1125       reg_set_table
1126         = (struct reg_set **) grealloc ((char *) reg_set_table,
1127                                         new_size * sizeof (struct reg_set *));
1128       memset ((char *) (reg_set_table + reg_set_table_size), 0,
1129              (new_size - reg_set_table_size) * sizeof (struct reg_set *));
1130       reg_set_table_size = new_size;
1131     }
1132
1133   new_reg_info = (struct reg_set *) obstack_alloc (&reg_set_obstack,
1134                                                    sizeof (struct reg_set));
1135   bytes_used += sizeof (struct reg_set);
1136   new_reg_info->insn = insn;
1137   new_reg_info->next = reg_set_table[regno];
1138   reg_set_table[regno] = new_reg_info;
1139 }
1140
1141 /* Called from compute_sets via note_stores to handle one SET or CLOBBER in
1142    an insn.  The DATA is really the instruction in which the SET is
1143    occurring.  */
1144
1145 static void
1146 record_set_info (dest, setter, data)
1147      rtx dest, setter ATTRIBUTE_UNUSED;
1148      void *data;
1149 {
1150   rtx record_set_insn = (rtx) data;
1151
1152   if (GET_CODE (dest) == REG && REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1153     record_one_set (REGNO (dest), record_set_insn);
1154 }
1155
1156 /* Scan the function and record each set of each pseudo-register.
1157
1158    This is called once, at the start of the gcse pass.  See the comments for
1159    `reg_set_table' for further documenation.  */
1160
1161 static void
1162 compute_sets (f)
1163      rtx f;
1164 {
1165   rtx insn;
1166
1167   for (insn = f; insn != 0; insn = NEXT_INSN (insn))
1168     if (INSN_P (insn))
1169       note_stores (PATTERN (insn), record_set_info, insn);
1170 }
1171 \f
1172 /* Hash table support.  */
1173
1174 /* For each register, the cuid of the first/last insn in the block to set it,
1175    or -1 if not set.  */
1176 #define NEVER_SET -1
1177 static int *reg_first_set;
1178 static int *reg_last_set;
1179
1180 /* While computing "first/last set" info, this is the CUID of first/last insn
1181    to set memory or -1 if not set.  `mem_last_set' is also used when
1182    performing GCSE to record whether memory has been set since the beginning
1183    of the block.
1184
1185    Note that handling of memory is very simple, we don't make any attempt
1186    to optimize things (later).  */
1187 static int mem_first_set;
1188 static int mem_last_set;
1189
1190 /* Perform a quick check whether X, the source of a set, is something
1191    we want to consider for GCSE.  */
1192
1193 static int
1194 want_to_gcse_p (x)
1195      rtx x;
1196 {
1197   switch (GET_CODE (x))
1198     {
1199     case REG:
1200     case SUBREG:
1201     case CONST_INT:
1202     case CONST_DOUBLE:
1203     case CALL:
1204       return 0;
1205
1206     default:
1207       break;
1208     }
1209
1210   return 1;
1211 }
1212
1213 /* Return non-zero if the operands of expression X are unchanged from the
1214    start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1215    or from INSN to the end of INSN's basic block (if AVAIL_P != 0).  */
1216
1217 static int
1218 oprs_unchanged_p (x, insn, avail_p)
1219      rtx x, insn;
1220      int avail_p;
1221 {
1222   int i, j;
1223   enum rtx_code code;
1224   const char *fmt;
1225
1226   if (x == 0)
1227     return 1;
1228
1229   code = GET_CODE (x);
1230   switch (code)
1231     {
1232     case REG:
1233       if (avail_p)
1234         return (reg_last_set[REGNO (x)] == NEVER_SET
1235                 || reg_last_set[REGNO (x)] < INSN_CUID (insn));
1236       else
1237         return (reg_first_set[REGNO (x)] == NEVER_SET
1238                 || reg_first_set[REGNO (x)] >= INSN_CUID (insn));
1239
1240     case MEM:
1241       if (avail_p && mem_last_set != NEVER_SET
1242           && mem_last_set >= INSN_CUID (insn))
1243         return 0;
1244       else if (! avail_p && mem_first_set != NEVER_SET
1245                && mem_first_set < INSN_CUID (insn))
1246         return 0;
1247       else
1248         return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
1249
1250     case PRE_DEC:
1251     case PRE_INC:
1252     case POST_DEC:
1253     case POST_INC:
1254     case PRE_MODIFY:
1255     case POST_MODIFY:
1256       return 0;
1257
1258     case PC:
1259     case CC0: /*FIXME*/
1260     case CONST:
1261     case CONST_INT:
1262     case CONST_DOUBLE:
1263     case SYMBOL_REF:
1264     case LABEL_REF:
1265     case ADDR_VEC:
1266     case ADDR_DIFF_VEC:
1267       return 1;
1268
1269     default:
1270       break;
1271     }
1272
1273   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1274     {
1275       if (fmt[i] == 'e')
1276         {
1277           /* If we are about to do the last recursive call needed at this
1278              level, change it into iteration.  This function is called enough
1279              to be worth it.  */
1280           if (i == 0)
1281             return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
1282
1283           else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
1284             return 0;
1285         }
1286       else if (fmt[i] == 'E')
1287         for (j = 0; j < XVECLEN (x, i); j++)
1288           if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1289             return 0;
1290     }
1291
1292   return 1;
1293 }
1294
1295 /* Return non-zero if the operands of expression X are unchanged from
1296    the start of INSN's basic block up to but not including INSN.  */
1297
1298 static int
1299 oprs_anticipatable_p (x, insn)
1300      rtx x, insn;
1301 {
1302   return oprs_unchanged_p (x, insn, 0);
1303 }
1304
1305 /* Return non-zero if the operands of expression X are unchanged from
1306    INSN to the end of INSN's basic block.  */
1307
1308 static int
1309 oprs_available_p (x, insn)
1310      rtx x, insn;
1311 {
1312   return oprs_unchanged_p (x, insn, 1);
1313 }
1314
1315 /* Hash expression X.
1316
1317    MODE is only used if X is a CONST_INT.  DO_NOT_RECORD_P is a boolean
1318    indicating if a volatile operand is found or if the expression contains
1319    something we don't want to insert in the table.
1320
1321    ??? One might want to merge this with canon_hash.  Later.  */
1322
1323 static unsigned int
1324 hash_expr (x, mode, do_not_record_p, hash_table_size)
1325      rtx x;
1326      enum machine_mode mode;
1327      int *do_not_record_p;
1328      int hash_table_size;
1329 {
1330   unsigned int hash;
1331
1332   *do_not_record_p = 0;
1333
1334   hash = hash_expr_1 (x, mode, do_not_record_p);
1335   return hash % hash_table_size;
1336 }
1337 /* Hash a string.  Just add its bytes up.  */
1338 static inline unsigned
1339 hash_string_1 (ps)
1340      const char *ps;
1341 {
1342   unsigned hash = 0;
1343   const unsigned char *p = (const unsigned char *)ps;
1344
1345   if (p)
1346     while (*p)
1347       hash += *p++;
1348
1349   return hash;
1350 }
1351
1352 /* Subroutine of hash_expr to do the actual work.  */
1353
1354 static unsigned int
1355 hash_expr_1 (x, mode, do_not_record_p)
1356      rtx x;
1357      enum machine_mode mode;
1358      int *do_not_record_p;
1359 {
1360   int i, j;
1361   unsigned hash = 0;
1362   enum rtx_code code;
1363   const char *fmt;
1364
1365   /* Used to turn recursion into iteration.  We can't rely on GCC's
1366      tail-recursion eliminatio since we need to keep accumulating values
1367      in HASH.  */
1368
1369   if (x == 0)
1370     return hash;
1371
1372  repeat:
1373   code = GET_CODE (x);
1374   switch (code)
1375     {
1376     case REG:
1377       hash += ((unsigned int) REG << 7) + REGNO (x);
1378       return hash;
1379
1380     case CONST_INT:
1381       hash += (((unsigned int) CONST_INT << 7) + (unsigned int) mode
1382                + (unsigned int) INTVAL (x));
1383       return hash;
1384
1385     case CONST_DOUBLE:
1386       /* This is like the general case, except that it only counts
1387          the integers representing the constant.  */
1388       hash += (unsigned int) code + (unsigned int) GET_MODE (x);
1389       if (GET_MODE (x) != VOIDmode)
1390         for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++)
1391           hash += (unsigned int) XWINT (x, i);
1392       else
1393         hash += ((unsigned int) CONST_DOUBLE_LOW (x)
1394                  + (unsigned int) CONST_DOUBLE_HIGH (x));
1395       return hash;
1396
1397       /* Assume there is only one rtx object for any given label.  */
1398     case LABEL_REF:
1399       /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1400          differences and differences between each stage's debugging dumps.  */
1401       hash += (((unsigned int) LABEL_REF << 7)
1402                + CODE_LABEL_NUMBER (XEXP (x, 0)));
1403       return hash;
1404
1405     case SYMBOL_REF:
1406       {
1407         /* Don't hash on the symbol's address to avoid bootstrap differences.
1408            Different hash values may cause expressions to be recorded in
1409            different orders and thus different registers to be used in the
1410            final assembler.  This also avoids differences in the dump files
1411            between various stages.  */
1412         unsigned int h = 0;
1413         const unsigned char *p = (const unsigned char *) XSTR (x, 0);
1414
1415         while (*p)
1416           h += (h << 7) + *p++; /* ??? revisit */
1417
1418         hash += ((unsigned int) SYMBOL_REF << 7) + h;
1419         return hash;
1420       }
1421
1422     case MEM:
1423       if (MEM_VOLATILE_P (x))
1424         {
1425           *do_not_record_p = 1;
1426           return 0;
1427         }
1428
1429       hash += (unsigned int) MEM;
1430       hash += MEM_ALIAS_SET (x);
1431       x = XEXP (x, 0);
1432       goto repeat;
1433
1434     case PRE_DEC:
1435     case PRE_INC:
1436     case POST_DEC:
1437     case POST_INC:
1438     case PC:
1439     case CC0:
1440     case CALL:
1441     case UNSPEC_VOLATILE:
1442       *do_not_record_p = 1;
1443       return 0;
1444
1445     case ASM_OPERANDS:
1446       if (MEM_VOLATILE_P (x))
1447         {
1448           *do_not_record_p = 1;
1449           return 0;
1450         }
1451       else
1452         {
1453           /* We don't want to take the filename and line into account.  */
1454           hash += (unsigned) code + (unsigned) GET_MODE (x)
1455             + hash_string_1 (ASM_OPERANDS_TEMPLATE (x))
1456             + hash_string_1 (ASM_OPERANDS_OUTPUT_CONSTRAINT (x))
1457             + (unsigned) ASM_OPERANDS_OUTPUT_IDX (x);
1458
1459           if (ASM_OPERANDS_INPUT_LENGTH (x))
1460             {
1461               for (i = 1; i < ASM_OPERANDS_INPUT_LENGTH (x); i++)
1462                 {
1463                   hash += (hash_expr_1 (ASM_OPERANDS_INPUT (x, i),
1464                                         GET_MODE (ASM_OPERANDS_INPUT (x, i)),
1465                                         do_not_record_p)
1466                            + hash_string_1 (ASM_OPERANDS_INPUT_CONSTRAINT
1467                                             (x, i)));
1468                 }
1469
1470               hash += hash_string_1 (ASM_OPERANDS_INPUT_CONSTRAINT (x, 0));
1471               x = ASM_OPERANDS_INPUT (x, 0);
1472               mode = GET_MODE (x);
1473               goto repeat;
1474             }
1475           return hash;
1476         }
1477
1478     default:
1479       break;
1480     }
1481
1482   hash += (unsigned) code + (unsigned) GET_MODE (x);
1483   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1484     {
1485       if (fmt[i] == 'e')
1486         {
1487           /* If we are about to do the last recursive call
1488              needed at this level, change it into iteration.
1489              This function is called enough to be worth it.  */
1490           if (i == 0)
1491             {
1492               x = XEXP (x, i);
1493               goto repeat;
1494             }
1495
1496           hash += hash_expr_1 (XEXP (x, i), 0, do_not_record_p);
1497           if (*do_not_record_p)
1498             return 0;
1499         }
1500
1501       else if (fmt[i] == 'E')
1502         for (j = 0; j < XVECLEN (x, i); j++)
1503           {
1504             hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p);
1505             if (*do_not_record_p)
1506               return 0;
1507           }
1508
1509       else if (fmt[i] == 's')
1510         hash += hash_string_1 (XSTR (x, i));
1511       else if (fmt[i] == 'i')
1512         hash += (unsigned int) XINT (x, i);
1513       else
1514         abort ();
1515     }
1516
1517   return hash;
1518 }
1519
1520 /* Hash a set of register REGNO.
1521
1522    Sets are hashed on the register that is set.  This simplifies the PRE copy
1523    propagation code.
1524
1525    ??? May need to make things more elaborate.  Later, as necessary.  */
1526
1527 static unsigned int
1528 hash_set (regno, hash_table_size)
1529      int regno;
1530      int hash_table_size;
1531 {
1532   unsigned int hash;
1533
1534   hash = regno;
1535   return hash % hash_table_size;
1536 }
1537
1538 /* Return non-zero if exp1 is equivalent to exp2.
1539    ??? Borrowed from cse.c.  Might want to remerge with cse.c.  Later.  */
1540
1541 static int
1542 expr_equiv_p (x, y)
1543      rtx x, y;
1544 {
1545   register int i, j;
1546   register enum rtx_code code;
1547   register const char *fmt;
1548
1549   if (x == y)
1550     return 1;
1551
1552   if (x == 0 || y == 0)
1553     return x == y;
1554
1555   code = GET_CODE (x);
1556   if (code != GET_CODE (y))
1557     return 0;
1558
1559   /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent.  */
1560   if (GET_MODE (x) != GET_MODE (y))
1561     return 0;
1562
1563   switch (code)
1564     {
1565     case PC:
1566     case CC0:
1567       return x == y;
1568
1569     case CONST_INT:
1570       return INTVAL (x) == INTVAL (y);
1571
1572     case LABEL_REF:
1573       return XEXP (x, 0) == XEXP (y, 0);
1574
1575     case SYMBOL_REF:
1576       return XSTR (x, 0) == XSTR (y, 0);
1577
1578     case REG:
1579       return REGNO (x) == REGNO (y);
1580
1581     case MEM:
1582       /* Can't merge two expressions in different alias sets, since we can
1583          decide that the expression is transparent in a block when it isn't,
1584          due to it being set with the different alias set.  */
1585       if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y))
1586         return 0;
1587       break;
1588
1589     /*  For commutative operations, check both orders.  */
1590     case PLUS:
1591     case MULT:
1592     case AND:
1593     case IOR:
1594     case XOR:
1595     case NE:
1596     case EQ:
1597       return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0))
1598                && expr_equiv_p (XEXP (x, 1), XEXP (y, 1)))
1599               || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1))
1600                   && expr_equiv_p (XEXP (x, 1), XEXP (y, 0))));
1601
1602     case ASM_OPERANDS:
1603       /* We don't use the generic code below because we want to
1604          disregard filename and line numbers.  */
1605
1606       /* A volatile asm isn't equivalent to any other.  */
1607       if (MEM_VOLATILE_P (x) || MEM_VOLATILE_P (y))
1608         return 0;
1609
1610       if (GET_MODE (x) != GET_MODE (y)
1611           || strcmp (ASM_OPERANDS_TEMPLATE (x), ASM_OPERANDS_TEMPLATE (y))
1612           || strcmp (ASM_OPERANDS_OUTPUT_CONSTRAINT (x),
1613                      ASM_OPERANDS_OUTPUT_CONSTRAINT (y))
1614           || ASM_OPERANDS_OUTPUT_IDX (x) != ASM_OPERANDS_OUTPUT_IDX (y)
1615           || ASM_OPERANDS_INPUT_LENGTH (x) != ASM_OPERANDS_INPUT_LENGTH (y))
1616         return 0;
1617
1618       if (ASM_OPERANDS_INPUT_LENGTH (x))
1619         {
1620           for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
1621             if (! expr_equiv_p (ASM_OPERANDS_INPUT (x, i),
1622                                 ASM_OPERANDS_INPUT (y, i))
1623                 || strcmp (ASM_OPERANDS_INPUT_CONSTRAINT (x, i),
1624                            ASM_OPERANDS_INPUT_CONSTRAINT (y, i)))
1625               return 0;
1626         }
1627
1628       return 1;
1629
1630     default:
1631       break;
1632     }
1633
1634   /* Compare the elements.  If any pair of corresponding elements
1635      fail to match, return 0 for the whole thing.  */
1636
1637   fmt = GET_RTX_FORMAT (code);
1638   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1639     {
1640       switch (fmt[i])
1641         {
1642         case 'e':
1643           if (! expr_equiv_p (XEXP (x, i), XEXP (y, i)))
1644             return 0;
1645           break;
1646
1647         case 'E':
1648           if (XVECLEN (x, i) != XVECLEN (y, i))
1649             return 0;
1650           for (j = 0; j < XVECLEN (x, i); j++)
1651             if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j)))
1652               return 0;
1653           break;
1654
1655         case 's':
1656           if (strcmp (XSTR (x, i), XSTR (y, i)))
1657             return 0;
1658           break;
1659
1660         case 'i':
1661           if (XINT (x, i) != XINT (y, i))
1662             return 0;
1663           break;
1664
1665         case 'w':
1666           if (XWINT (x, i) != XWINT (y, i))
1667             return 0;
1668         break;
1669
1670         case '0':
1671           break;
1672
1673         default:
1674           abort ();
1675         }
1676       }
1677
1678   return 1;
1679 }
1680
1681 /* Insert expression X in INSN in the hash table.
1682    If it is already present, record it as the last occurrence in INSN's
1683    basic block.
1684
1685    MODE is the mode of the value X is being stored into.
1686    It is only used if X is a CONST_INT.
1687
1688    ANTIC_P is non-zero if X is an anticipatable expression.
1689    AVAIL_P is non-zero if X is an available expression.  */
1690
1691 static void
1692 insert_expr_in_table (x, mode, insn, antic_p, avail_p)
1693      rtx x;
1694      enum machine_mode mode;
1695      rtx insn;
1696      int antic_p, avail_p;
1697 {
1698   int found, do_not_record_p;
1699   unsigned int hash;
1700   struct expr *cur_expr, *last_expr = NULL;
1701   struct occr *antic_occr, *avail_occr;
1702   struct occr *last_occr = NULL;
1703
1704   hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size);
1705
1706   /* Do not insert expression in table if it contains volatile operands,
1707      or if hash_expr determines the expression is something we don't want
1708      to or can't handle.  */
1709   if (do_not_record_p)
1710     return;
1711
1712   cur_expr = expr_hash_table[hash];
1713   found = 0;
1714
1715   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1716     {
1717       /* If the expression isn't found, save a pointer to the end of
1718          the list.  */
1719       last_expr = cur_expr;
1720       cur_expr = cur_expr->next_same_hash;
1721     }
1722
1723   if (! found)
1724     {
1725       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1726       bytes_used += sizeof (struct expr);
1727       if (expr_hash_table[hash] == NULL)
1728         /* This is the first pattern that hashed to this index.  */
1729         expr_hash_table[hash] = cur_expr;
1730       else
1731         /* Add EXPR to end of this hash chain.  */
1732         last_expr->next_same_hash = cur_expr;
1733
1734       /* Set the fields of the expr element.  */
1735       cur_expr->expr = x;
1736       cur_expr->bitmap_index = n_exprs++;
1737       cur_expr->next_same_hash = NULL;
1738       cur_expr->antic_occr = NULL;
1739       cur_expr->avail_occr = NULL;
1740     }
1741
1742   /* Now record the occurrence(s).  */
1743   if (antic_p)
1744     {
1745       antic_occr = cur_expr->antic_occr;
1746
1747       /* Search for another occurrence in the same basic block.  */
1748       while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
1749         {
1750           /* If an occurrence isn't found, save a pointer to the end of
1751              the list.  */
1752           last_occr = antic_occr;
1753           antic_occr = antic_occr->next;
1754         }
1755
1756       if (antic_occr)
1757         /* Found another instance of the expression in the same basic block.
1758            Prefer the currently recorded one.  We want the first one in the
1759            block and the block is scanned from start to end.  */
1760         ; /* nothing to do */
1761       else
1762         {
1763           /* First occurrence of this expression in this basic block.  */
1764           antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1765           bytes_used += sizeof (struct occr);
1766           /* First occurrence of this expression in any block?  */
1767           if (cur_expr->antic_occr == NULL)
1768             cur_expr->antic_occr = antic_occr;
1769           else
1770             last_occr->next = antic_occr;
1771
1772           antic_occr->insn = insn;
1773           antic_occr->next = NULL;
1774         }
1775     }
1776
1777   if (avail_p)
1778     {
1779       avail_occr = cur_expr->avail_occr;
1780
1781       /* Search for another occurrence in the same basic block.  */
1782       while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn))
1783         {
1784           /* If an occurrence isn't found, save a pointer to the end of
1785              the list.  */
1786           last_occr = avail_occr;
1787           avail_occr = avail_occr->next;
1788         }
1789
1790       if (avail_occr)
1791         /* Found another instance of the expression in the same basic block.
1792            Prefer this occurrence to the currently recorded one.  We want
1793            the last one in the block and the block is scanned from start
1794            to end.  */
1795         avail_occr->insn = insn;
1796       else
1797         {
1798           /* First occurrence of this expression in this basic block.  */
1799           avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1800           bytes_used += sizeof (struct occr);
1801
1802           /* First occurrence of this expression in any block?  */
1803           if (cur_expr->avail_occr == NULL)
1804             cur_expr->avail_occr = avail_occr;
1805           else
1806             last_occr->next = avail_occr;
1807
1808           avail_occr->insn = insn;
1809           avail_occr->next = NULL;
1810         }
1811     }
1812 }
1813
1814 /* Insert pattern X in INSN in the hash table.
1815    X is a SET of a reg to either another reg or a constant.
1816    If it is already present, record it as the last occurrence in INSN's
1817    basic block.  */
1818
1819 static void
1820 insert_set_in_table (x, insn)
1821      rtx x;
1822      rtx insn;
1823 {
1824   int found;
1825   unsigned int hash;
1826   struct expr *cur_expr, *last_expr = NULL;
1827   struct occr *cur_occr, *last_occr = NULL;
1828
1829   if (GET_CODE (x) != SET
1830       || GET_CODE (SET_DEST (x)) != REG)
1831     abort ();
1832
1833   hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size);
1834
1835   cur_expr = set_hash_table[hash];
1836   found = 0;
1837
1838   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1839     {
1840       /* If the expression isn't found, save a pointer to the end of
1841          the list.  */
1842       last_expr = cur_expr;
1843       cur_expr = cur_expr->next_same_hash;
1844     }
1845
1846   if (! found)
1847     {
1848       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1849       bytes_used += sizeof (struct expr);
1850       if (set_hash_table[hash] == NULL)
1851         /* This is the first pattern that hashed to this index.  */
1852         set_hash_table[hash] = cur_expr;
1853       else
1854         /* Add EXPR to end of this hash chain.  */
1855         last_expr->next_same_hash = cur_expr;
1856
1857       /* Set the fields of the expr element.
1858          We must copy X because it can be modified when copy propagation is
1859          performed on its operands.  */
1860       /* ??? Should this go in a different obstack?  */
1861       cur_expr->expr = copy_rtx (x);
1862       cur_expr->bitmap_index = n_sets++;
1863       cur_expr->next_same_hash = NULL;
1864       cur_expr->antic_occr = NULL;
1865       cur_expr->avail_occr = NULL;
1866     }
1867
1868   /* Now record the occurrence.  */
1869   cur_occr = cur_expr->avail_occr;
1870
1871   /* Search for another occurrence in the same basic block.  */
1872   while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn))
1873     {
1874       /* If an occurrence isn't found, save a pointer to the end of
1875          the list.  */
1876       last_occr = cur_occr;
1877       cur_occr = cur_occr->next;
1878     }
1879
1880   if (cur_occr)
1881     /* Found another instance of the expression in the same basic block.
1882        Prefer this occurrence to the currently recorded one.  We want the
1883        last one in the block and the block is scanned from start to end.  */
1884     cur_occr->insn = insn;
1885   else
1886     {
1887       /* First occurrence of this expression in this basic block.  */
1888       cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1889       bytes_used += sizeof (struct occr);
1890
1891       /* First occurrence of this expression in any block?  */
1892       if (cur_expr->avail_occr == NULL)
1893         cur_expr->avail_occr = cur_occr;
1894       else
1895         last_occr->next = cur_occr;
1896
1897       cur_occr->insn = insn;
1898       cur_occr->next = NULL;
1899     }
1900 }
1901
1902 /* Scan pattern PAT of INSN and add an entry to the hash table.  If SET_P is
1903    non-zero, this is for the assignment hash table, otherwise it is for the
1904    expression hash table.  */
1905
1906 static void
1907 hash_scan_set (pat, insn, set_p)
1908      rtx pat, insn;
1909      int set_p;
1910 {
1911   rtx src = SET_SRC (pat);
1912   rtx dest = SET_DEST (pat);
1913
1914   if (GET_CODE (src) == CALL)
1915     hash_scan_call (src, insn);
1916
1917   if (GET_CODE (dest) == REG)
1918     {
1919       int regno = REGNO (dest);
1920       rtx tmp;
1921
1922       /* Only record sets of pseudo-regs in the hash table.  */
1923       if (! set_p
1924           && regno >= FIRST_PSEUDO_REGISTER
1925           /* Don't GCSE something if we can't do a reg/reg copy.  */
1926           && can_copy_p [GET_MODE (dest)]
1927           /* Is SET_SRC something we want to gcse?  */
1928           && want_to_gcse_p (src))
1929         {
1930           /* An expression is not anticipatable if its operands are
1931              modified before this insn.  */
1932           int antic_p = oprs_anticipatable_p (src, insn);
1933           /* An expression is not available if its operands are
1934              subsequently modified, including this insn.  */
1935           int avail_p = oprs_available_p (src, insn);
1936
1937           insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p);
1938         }
1939
1940       /* Record sets for constant/copy propagation.  */
1941       else if (set_p
1942                && regno >= FIRST_PSEUDO_REGISTER
1943                && ((GET_CODE (src) == REG
1944                     && REGNO (src) >= FIRST_PSEUDO_REGISTER
1945                     && can_copy_p [GET_MODE (dest)])
1946                    || GET_CODE (src) == CONST_INT
1947                    || GET_CODE (src) == SYMBOL_REF
1948                    || GET_CODE (src) == CONST_DOUBLE)
1949                /* A copy is not available if its src or dest is subsequently
1950                   modified.  Here we want to search from INSN+1 on, but
1951                   oprs_available_p searches from INSN on.  */
1952                && (insn == BLOCK_END (BLOCK_NUM (insn))
1953                    || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
1954                        && oprs_available_p (pat, tmp))))
1955         insert_set_in_table (pat, insn);
1956     }
1957 }
1958
1959 static void
1960 hash_scan_clobber (x, insn)
1961      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1962 {
1963   /* Currently nothing to do.  */
1964 }
1965
1966 static void
1967 hash_scan_call (x, insn)
1968      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1969 {
1970   /* Currently nothing to do.  */
1971 }
1972
1973 /* Process INSN and add hash table entries as appropriate.
1974
1975    Only available expressions that set a single pseudo-reg are recorded.
1976
1977    Single sets in a PARALLEL could be handled, but it's an extra complication
1978    that isn't dealt with right now.  The trick is handling the CLOBBERs that
1979    are also in the PARALLEL.  Later.
1980
1981    If SET_P is non-zero, this is for the assignment hash table,
1982    otherwise it is for the expression hash table.
1983    If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
1984    not record any expressions.  */
1985
1986 static void
1987 hash_scan_insn (insn, set_p, in_libcall_block)
1988      rtx insn;
1989      int set_p;
1990      int in_libcall_block;
1991 {
1992   rtx pat = PATTERN (insn);
1993   int i;
1994
1995   /* Pick out the sets of INSN and for other forms of instructions record
1996      what's been modified.  */
1997
1998   if (GET_CODE (pat) == SET && ! in_libcall_block)
1999     {
2000       /* Ignore obvious no-ops.  */
2001       if (SET_SRC (pat) != SET_DEST (pat))
2002         hash_scan_set (pat, insn, set_p);
2003     }
2004   else if (GET_CODE (pat) == PARALLEL)
2005     for (i = 0; i < XVECLEN (pat, 0); i++)
2006       {
2007         rtx x = XVECEXP (pat, 0, i);
2008
2009         if (GET_CODE (x) == SET)
2010           {
2011             if (GET_CODE (SET_SRC (x)) == CALL)
2012               hash_scan_call (SET_SRC (x), insn);
2013           }
2014         else if (GET_CODE (x) == CLOBBER)
2015           hash_scan_clobber (x, insn);
2016         else if (GET_CODE (x) == CALL)
2017           hash_scan_call (x, insn);
2018       }
2019
2020   else if (GET_CODE (pat) == CLOBBER)
2021     hash_scan_clobber (pat, insn);
2022   else if (GET_CODE (pat) == CALL)
2023     hash_scan_call (pat, insn);
2024 }
2025
2026 static void
2027 dump_hash_table (file, name, table, table_size, total_size)
2028      FILE *file;
2029      const char *name;
2030      struct expr **table;
2031      int table_size, total_size;
2032 {
2033   int i;
2034   /* Flattened out table, so it's printed in proper order.  */
2035   struct expr **flat_table;
2036   unsigned int *hash_val;
2037   struct expr *expr;
2038
2039   flat_table
2040     = (struct expr **) xcalloc (total_size, sizeof (struct expr *));
2041   hash_val = (unsigned int *) xmalloc (total_size * sizeof (unsigned int));
2042
2043   for (i = 0; i < table_size; i++)
2044     for (expr = table[i]; expr != NULL; expr = expr->next_same_hash)
2045       {
2046         flat_table[expr->bitmap_index] = expr;
2047         hash_val[expr->bitmap_index] = i;
2048       }
2049
2050   fprintf (file, "%s hash table (%d buckets, %d entries)\n",
2051            name, table_size, total_size);
2052
2053   for (i = 0; i < total_size; i++)
2054     if (flat_table[i] != 0)
2055       {
2056         expr = flat_table[i];
2057         fprintf (file, "Index %d (hash value %d)\n  ",
2058                  expr->bitmap_index, hash_val[i]);
2059         print_rtl (file, expr->expr);
2060         fprintf (file, "\n");
2061       }
2062
2063   fprintf (file, "\n");
2064
2065   free (flat_table);
2066   free (hash_val);
2067 }
2068
2069 /* Record register first/last/block set information for REGNO in INSN.
2070
2071    reg_first_set records the first place in the block where the register
2072    is set and is used to compute "anticipatability".
2073
2074    reg_last_set records the last place in the block where the register
2075    is set and is used to compute "availability".
2076
2077    reg_set_in_block records whether the register is set in the block
2078    and is used to compute "transparency".  */
2079
2080 static void
2081 record_last_reg_set_info (insn, regno)
2082      rtx insn;
2083      int regno;
2084 {
2085   if (reg_first_set[regno] == NEVER_SET)
2086     reg_first_set[regno] = INSN_CUID (insn);
2087
2088   reg_last_set[regno] = INSN_CUID (insn);
2089   SET_BIT (reg_set_in_block[BLOCK_NUM (insn)], regno);
2090 }
2091
2092 /* Record memory first/last/block set information for INSN.  */
2093
2094 static void
2095 record_last_mem_set_info (insn)
2096      rtx insn;
2097 {
2098   if (mem_first_set == NEVER_SET)
2099     mem_first_set = INSN_CUID (insn);
2100
2101   mem_last_set = INSN_CUID (insn);
2102   mem_set_in_block[BLOCK_NUM (insn)] = 1;
2103 }
2104
2105 /* Called from compute_hash_table via note_stores to handle one
2106    SET or CLOBBER in an insn.  DATA is really the instruction in which
2107    the SET is taking place.  */
2108
2109 static void
2110 record_last_set_info (dest, setter, data)
2111      rtx dest, setter ATTRIBUTE_UNUSED;
2112      void *data;
2113 {
2114   rtx last_set_insn = (rtx) data;
2115
2116   if (GET_CODE (dest) == SUBREG)
2117     dest = SUBREG_REG (dest);
2118
2119   if (GET_CODE (dest) == REG)
2120     record_last_reg_set_info (last_set_insn, REGNO (dest));
2121   else if (GET_CODE (dest) == MEM
2122            /* Ignore pushes, they clobber nothing.  */
2123            && ! push_operand (dest, GET_MODE (dest)))
2124     record_last_mem_set_info (last_set_insn);
2125 }
2126
2127 /* Top level function to create an expression or assignment hash table.
2128
2129    Expression entries are placed in the hash table if
2130    - they are of the form (set (pseudo-reg) src),
2131    - src is something we want to perform GCSE on,
2132    - none of the operands are subsequently modified in the block
2133
2134    Assignment entries are placed in the hash table if
2135    - they are of the form (set (pseudo-reg) src),
2136    - src is something we want to perform const/copy propagation on,
2137    - none of the operands or target are subsequently modified in the block
2138
2139    Currently src must be a pseudo-reg or a const_int.
2140
2141    F is the first insn.
2142    SET_P is non-zero for computing the assignment hash table.  */
2143
2144 static void
2145 compute_hash_table (set_p)
2146      int set_p;
2147 {
2148   int bb;
2149
2150   /* While we compute the hash table we also compute a bit array of which
2151      registers are set in which blocks.
2152      We also compute which blocks set memory, in the absence of aliasing
2153      support [which is TODO].
2154      ??? This isn't needed during const/copy propagation, but it's cheap to
2155      compute.  Later.  */
2156   sbitmap_vector_zero (reg_set_in_block, n_basic_blocks);
2157   memset ((char *) mem_set_in_block, 0, n_basic_blocks);
2158
2159   /* Some working arrays used to track first and last set in each block.  */
2160   /* ??? One could use alloca here, but at some size a threshold is crossed
2161      beyond which one should use malloc.  Are we at that threshold here?  */
2162   reg_first_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2163   reg_last_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2164
2165   for (bb = 0; bb < n_basic_blocks; bb++)
2166     {
2167       rtx insn;
2168       unsigned int regno;
2169       int in_libcall_block;
2170       unsigned int i;
2171
2172       /* First pass over the instructions records information used to
2173          determine when registers and memory are first and last set.
2174          ??? The mem_set_in_block and hard-reg reg_set_in_block computation
2175          could be moved to compute_sets since they currently don't change.  */
2176
2177       for (i = 0; i < max_gcse_regno; i++)
2178         reg_first_set[i] = reg_last_set[i] = NEVER_SET;
2179
2180       mem_first_set = NEVER_SET;
2181       mem_last_set = NEVER_SET;
2182
2183       for (insn = BLOCK_HEAD (bb);
2184            insn && insn != NEXT_INSN (BLOCK_END (bb));
2185            insn = NEXT_INSN (insn))
2186         {
2187 #ifdef NON_SAVING_SETJMP
2188           if (NON_SAVING_SETJMP && GET_CODE (insn) == NOTE
2189               && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)
2190             {
2191               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2192                 record_last_reg_set_info (insn, regno);
2193               continue;
2194             }
2195 #endif
2196
2197           if (! INSN_P (insn))
2198             continue;
2199
2200           if (GET_CODE (insn) == CALL_INSN)
2201             {
2202               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2203                 if ((call_used_regs[regno]
2204                      && regno != STACK_POINTER_REGNUM
2205 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2206                      && regno != HARD_FRAME_POINTER_REGNUM
2207 #endif
2208 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2209                      && ! (regno == ARG_POINTER_REGNUM && fixed_regs[regno])
2210 #endif
2211 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2212                      && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2213 #endif
2214
2215                      && regno != FRAME_POINTER_REGNUM)
2216                     || global_regs[regno])
2217                   record_last_reg_set_info (insn, regno);
2218
2219               if (! CONST_CALL_P (insn))
2220                 record_last_mem_set_info (insn);
2221             }
2222
2223           note_stores (PATTERN (insn), record_last_set_info, insn);
2224         }
2225
2226       /* The next pass builds the hash table.  */
2227
2228       for (insn = BLOCK_HEAD (bb), in_libcall_block = 0;
2229            insn && insn != NEXT_INSN (BLOCK_END (bb));
2230            insn = NEXT_INSN (insn))
2231         if (INSN_P (insn))
2232           {
2233             if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2234               in_libcall_block = 1;
2235             else if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
2236               in_libcall_block = 0;
2237             hash_scan_insn (insn, set_p, in_libcall_block);
2238         }
2239     }
2240
2241   free (reg_first_set);
2242   free (reg_last_set);
2243
2244   /* Catch bugs early.  */
2245   reg_first_set = reg_last_set = 0;
2246 }
2247
2248 /* Allocate space for the set hash table.
2249    N_INSNS is the number of instructions in the function.
2250    It is used to determine the number of buckets to use.  */
2251
2252 static void
2253 alloc_set_hash_table (n_insns)
2254      int n_insns;
2255 {
2256   int n;
2257
2258   set_hash_table_size = n_insns / 4;
2259   if (set_hash_table_size < 11)
2260     set_hash_table_size = 11;
2261
2262   /* Attempt to maintain efficient use of hash table.
2263      Making it an odd number is simplest for now.
2264      ??? Later take some measurements.  */
2265   set_hash_table_size |= 1;
2266   n = set_hash_table_size * sizeof (struct expr *);
2267   set_hash_table = (struct expr **) gmalloc (n);
2268 }
2269
2270 /* Free things allocated by alloc_set_hash_table.  */
2271
2272 static void
2273 free_set_hash_table ()
2274 {
2275   free (set_hash_table);
2276 }
2277
2278 /* Compute the hash table for doing copy/const propagation.  */
2279
2280 static void
2281 compute_set_hash_table ()
2282 {
2283   /* Initialize count of number of entries in hash table.  */
2284   n_sets = 0;
2285   memset ((char *) set_hash_table, 0,
2286          set_hash_table_size * sizeof (struct expr *));
2287
2288   compute_hash_table (1);
2289 }
2290
2291 /* Allocate space for the expression hash table.
2292    N_INSNS is the number of instructions in the function.
2293    It is used to determine the number of buckets to use.  */
2294
2295 static void
2296 alloc_expr_hash_table (n_insns)
2297      unsigned int n_insns;
2298 {
2299   int n;
2300
2301   expr_hash_table_size = n_insns / 2;
2302   /* Make sure the amount is usable.  */
2303   if (expr_hash_table_size < 11)
2304     expr_hash_table_size = 11;
2305
2306   /* Attempt to maintain efficient use of hash table.
2307      Making it an odd number is simplest for now.
2308      ??? Later take some measurements.  */
2309   expr_hash_table_size |= 1;
2310   n = expr_hash_table_size * sizeof (struct expr *);
2311   expr_hash_table = (struct expr **) gmalloc (n);
2312 }
2313
2314 /* Free things allocated by alloc_expr_hash_table.  */
2315
2316 static void
2317 free_expr_hash_table ()
2318 {
2319   free (expr_hash_table);
2320 }
2321
2322 /* Compute the hash table for doing GCSE.  */
2323
2324 static void
2325 compute_expr_hash_table ()
2326 {
2327   /* Initialize count of number of entries in hash table.  */
2328   n_exprs = 0;
2329   memset ((char *) expr_hash_table, 0,
2330          expr_hash_table_size * sizeof (struct expr *));
2331
2332   compute_hash_table (0);
2333 }
2334 \f
2335 /* Expression tracking support.  */
2336
2337 /* Lookup pattern PAT in the expression table.
2338    The result is a pointer to the table entry, or NULL if not found.  */
2339
2340 static struct expr *
2341 lookup_expr (pat)
2342      rtx pat;
2343 {
2344   int do_not_record_p;
2345   unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p,
2346                                  expr_hash_table_size);
2347   struct expr *expr;
2348
2349   if (do_not_record_p)
2350     return NULL;
2351
2352   expr = expr_hash_table[hash];
2353
2354   while (expr && ! expr_equiv_p (expr->expr, pat))
2355     expr = expr->next_same_hash;
2356
2357   return expr;
2358 }
2359
2360 /* Lookup REGNO in the set table.  If PAT is non-NULL look for the entry that
2361    matches it, otherwise return the first entry for REGNO.  The result is a
2362    pointer to the table entry, or NULL if not found.  */
2363
2364 static struct expr *
2365 lookup_set (regno, pat)
2366      unsigned int regno;
2367      rtx pat;
2368 {
2369   unsigned int hash = hash_set (regno, set_hash_table_size);
2370   struct expr *expr;
2371
2372   expr = set_hash_table[hash];
2373
2374   if (pat)
2375     {
2376       while (expr && ! expr_equiv_p (expr->expr, pat))
2377         expr = expr->next_same_hash;
2378     }
2379   else
2380     {
2381       while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2382         expr = expr->next_same_hash;
2383     }
2384
2385   return expr;
2386 }
2387
2388 /* Return the next entry for REGNO in list EXPR.  */
2389
2390 static struct expr *
2391 next_set (regno, expr)
2392      unsigned int regno;
2393      struct expr *expr;
2394 {
2395   do
2396     expr = expr->next_same_hash;
2397   while (expr && REGNO (SET_DEST (expr->expr)) != regno);
2398
2399   return expr;
2400 }
2401
2402 /* Reset tables used to keep track of what's still available [since the
2403    start of the block].  */
2404
2405 static void
2406 reset_opr_set_tables ()
2407 {
2408   /* Maintain a bitmap of which regs have been set since beginning of
2409      the block.  */
2410   sbitmap_zero (reg_set_bitmap);
2411
2412   /* Also keep a record of the last instruction to modify memory.
2413      For now this is very trivial, we only record whether any memory
2414      location has been modified.  */
2415   mem_last_set = 0;
2416 }
2417
2418 /* Return non-zero if the operands of X are not set before INSN in
2419    INSN's basic block.  */
2420
2421 static int
2422 oprs_not_set_p (x, insn)
2423      rtx x, insn;
2424 {
2425   int i, j;
2426   enum rtx_code code;
2427   const char *fmt;
2428
2429   if (x == 0)
2430     return 1;
2431
2432   code = GET_CODE (x);
2433   switch (code)
2434     {
2435     case PC:
2436     case CC0:
2437     case CONST:
2438     case CONST_INT:
2439     case CONST_DOUBLE:
2440     case SYMBOL_REF:
2441     case LABEL_REF:
2442     case ADDR_VEC:
2443     case ADDR_DIFF_VEC:
2444       return 1;
2445
2446     case MEM:
2447       if (mem_last_set != 0)
2448         return 0;
2449       else
2450         return oprs_not_set_p (XEXP (x, 0), insn);
2451
2452     case REG:
2453       return ! TEST_BIT (reg_set_bitmap, REGNO (x));
2454
2455     default:
2456       break;
2457     }
2458
2459   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2460     {
2461       if (fmt[i] == 'e')
2462         {
2463           /* If we are about to do the last recursive call
2464              needed at this level, change it into iteration.
2465              This function is called enough to be worth it.  */
2466           if (i == 0)
2467             return oprs_not_set_p (XEXP (x, i), insn);
2468
2469           if (! oprs_not_set_p (XEXP (x, i), insn))
2470             return 0;
2471         }
2472       else if (fmt[i] == 'E')
2473         for (j = 0; j < XVECLEN (x, i); j++)
2474           if (! oprs_not_set_p (XVECEXP (x, i, j), insn))
2475             return 0;
2476     }
2477
2478   return 1;
2479 }
2480
2481 /* Mark things set by a CALL.  */
2482
2483 static void
2484 mark_call (insn)
2485      rtx insn;
2486 {
2487   mem_last_set = INSN_CUID (insn);
2488 }
2489
2490 /* Mark things set by a SET.  */
2491
2492 static void
2493 mark_set (pat, insn)
2494      rtx pat, insn;
2495 {
2496   rtx dest = SET_DEST (pat);
2497
2498   while (GET_CODE (dest) == SUBREG
2499          || GET_CODE (dest) == ZERO_EXTRACT
2500          || GET_CODE (dest) == SIGN_EXTRACT
2501          || GET_CODE (dest) == STRICT_LOW_PART)
2502     dest = XEXP (dest, 0);
2503
2504   if (GET_CODE (dest) == REG)
2505     SET_BIT (reg_set_bitmap, REGNO (dest));
2506   else if (GET_CODE (dest) == MEM)
2507     mem_last_set = INSN_CUID (insn);
2508
2509   if (GET_CODE (SET_SRC (pat)) == CALL)
2510     mark_call (insn);
2511 }
2512
2513 /* Record things set by a CLOBBER.  */
2514
2515 static void
2516 mark_clobber (pat, insn)
2517      rtx pat, insn;
2518 {
2519   rtx clob = XEXP (pat, 0);
2520
2521   while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2522     clob = XEXP (clob, 0);
2523
2524   if (GET_CODE (clob) == REG)
2525     SET_BIT (reg_set_bitmap, REGNO (clob));
2526   else
2527     mem_last_set = INSN_CUID (insn);
2528 }
2529
2530 /* Record things set by INSN.
2531    This data is used by oprs_not_set_p.  */
2532
2533 static void
2534 mark_oprs_set (insn)
2535      rtx insn;
2536 {
2537   rtx pat = PATTERN (insn);
2538   int i;
2539
2540   if (GET_CODE (pat) == SET)
2541     mark_set (pat, insn);
2542   else if (GET_CODE (pat) == PARALLEL)
2543     for (i = 0; i < XVECLEN (pat, 0); i++)
2544       {
2545         rtx x = XVECEXP (pat, 0, i);
2546
2547         if (GET_CODE (x) == SET)
2548           mark_set (x, insn);
2549         else if (GET_CODE (x) == CLOBBER)
2550           mark_clobber (x, insn);
2551         else if (GET_CODE (x) == CALL)
2552           mark_call (insn);
2553       }
2554
2555   else if (GET_CODE (pat) == CLOBBER)
2556     mark_clobber (pat, insn);
2557   else if (GET_CODE (pat) == CALL)
2558     mark_call (insn);
2559 }
2560
2561 \f
2562 /* Classic GCSE reaching definition support.  */
2563
2564 /* Allocate reaching def variables.  */
2565
2566 static void
2567 alloc_rd_mem (n_blocks, n_insns)
2568      int n_blocks, n_insns;
2569 {
2570   rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2571   sbitmap_vector_zero (rd_kill, n_basic_blocks);
2572
2573   rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2574   sbitmap_vector_zero (rd_gen, n_basic_blocks);
2575
2576   reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2577   sbitmap_vector_zero (reaching_defs, n_basic_blocks);
2578
2579   rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2580   sbitmap_vector_zero (rd_out, n_basic_blocks);
2581 }
2582
2583 /* Free reaching def variables.  */
2584
2585 static void
2586 free_rd_mem ()
2587 {
2588   free (rd_kill);
2589   free (rd_gen);
2590   free (reaching_defs);
2591   free (rd_out);
2592 }
2593
2594 /* Add INSN to the kills of BB.  REGNO, set in BB, is killed by INSN.  */
2595
2596 static void
2597 handle_rd_kill_set (insn, regno, bb)
2598      rtx insn;
2599      int regno, bb;
2600 {
2601   struct reg_set *this_reg;
2602
2603   for (this_reg = reg_set_table[regno]; this_reg; this_reg = this_reg ->next)
2604     if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn))
2605       SET_BIT (rd_kill[bb], INSN_CUID (this_reg->insn));
2606 }
2607
2608 /* Compute the set of kill's for reaching definitions.  */
2609
2610 static void
2611 compute_kill_rd ()
2612 {
2613   int bb, cuid;
2614   int regno, i;
2615
2616   /* For each block
2617        For each set bit in `gen' of the block (i.e each insn which
2618            generates a definition in the block)
2619          Call the reg set by the insn corresponding to that bit regx
2620          Look at the linked list starting at reg_set_table[regx]
2621          For each setting of regx in the linked list, which is not in
2622              this block
2623            Set the bit in `kill' corresponding to that insn.   */
2624   for (bb = 0; bb < n_basic_blocks; bb++)
2625     for (cuid = 0; cuid < max_cuid; cuid++)
2626       if (TEST_BIT (rd_gen[bb], cuid))
2627         {
2628           rtx insn = CUID_INSN (cuid);
2629           rtx pat = PATTERN (insn);
2630
2631           if (GET_CODE (insn) == CALL_INSN)
2632             {
2633               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2634                 {
2635                   if ((call_used_regs[regno]
2636                        && regno != STACK_POINTER_REGNUM
2637 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2638                        && regno != HARD_FRAME_POINTER_REGNUM
2639 #endif
2640 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2641                        && ! (regno == ARG_POINTER_REGNUM
2642                              && fixed_regs[regno])
2643 #endif
2644 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2645                        && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2646 #endif
2647                        && regno != FRAME_POINTER_REGNUM)
2648                       || global_regs[regno])
2649                     handle_rd_kill_set (insn, regno, bb);
2650                 }
2651             }
2652
2653           if (GET_CODE (pat) == PARALLEL)
2654             {
2655               for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
2656                 {
2657                   enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i));
2658
2659                   if ((code == SET || code == CLOBBER)
2660                       && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG)
2661                     handle_rd_kill_set (insn,
2662                                         REGNO (XEXP (XVECEXP (pat, 0, i), 0)),
2663                                         bb);
2664                 }
2665             }
2666           else if (GET_CODE (pat) == SET && GET_CODE (SET_DEST (pat)) == REG)
2667             /* Each setting of this register outside of this block
2668                must be marked in the set of kills in this block.  */
2669             handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), bb);
2670         }
2671 }
2672
2673 /* Compute the reaching definitions as in
2674    Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
2675    Chapter 10.  It is the same algorithm as used for computing available
2676    expressions but applied to the gens and kills of reaching definitions.  */
2677
2678 static void
2679 compute_rd ()
2680 {
2681   int bb, changed, passes;
2682
2683   for (bb = 0; bb < n_basic_blocks; bb++)
2684     sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/);
2685
2686   passes = 0;
2687   changed = 1;
2688   while (changed)
2689     {
2690       changed = 0;
2691       for (bb = 0; bb < n_basic_blocks; bb++)
2692         {
2693           sbitmap_union_of_preds (reaching_defs[bb], rd_out, bb);
2694           changed |= sbitmap_union_of_diff (rd_out[bb], rd_gen[bb],
2695                                             reaching_defs[bb], rd_kill[bb]);
2696         }
2697       passes++;
2698     }
2699
2700   if (gcse_file)
2701     fprintf (gcse_file, "reaching def computation: %d passes\n", passes);
2702 }
2703 \f
2704 /* Classic GCSE available expression support.  */
2705
2706 /* Allocate memory for available expression computation.  */
2707
2708 static void
2709 alloc_avail_expr_mem (n_blocks, n_exprs)
2710      int n_blocks, n_exprs;
2711 {
2712   ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2713   sbitmap_vector_zero (ae_kill, n_basic_blocks);
2714
2715   ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2716   sbitmap_vector_zero (ae_gen, n_basic_blocks);
2717
2718   ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2719   sbitmap_vector_zero (ae_in, n_basic_blocks);
2720
2721   ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2722   sbitmap_vector_zero (ae_out, n_basic_blocks);
2723 }
2724
2725 static void
2726 free_avail_expr_mem ()
2727 {
2728   free (ae_kill);
2729   free (ae_gen);
2730   free (ae_in);
2731   free (ae_out);
2732 }
2733
2734 /* Compute the set of available expressions generated in each basic block.  */
2735
2736 static void
2737 compute_ae_gen ()
2738 {
2739   unsigned int i;
2740   struct expr *expr;
2741   struct occr *occr;
2742
2743   /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
2744      This is all we have to do because an expression is not recorded if it
2745      is not available, and the only expressions we want to work with are the
2746      ones that are recorded.  */
2747   for (i = 0; i < expr_hash_table_size; i++)
2748     for (expr = expr_hash_table[i]; expr != 0; expr = expr->next_same_hash)
2749       for (occr = expr->avail_occr; occr != 0; occr = occr->next)
2750         SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index);
2751 }
2752
2753 /* Return non-zero if expression X is killed in BB.  */
2754
2755 static int
2756 expr_killed_p (x, bb)
2757      rtx x;
2758      int bb;
2759 {
2760   int i, j;
2761   enum rtx_code code;
2762   const char *fmt;
2763
2764   if (x == 0)
2765     return 1;
2766
2767   code = GET_CODE (x);
2768   switch (code)
2769     {
2770     case REG:
2771       return TEST_BIT (reg_set_in_block[bb], REGNO (x));
2772
2773     case MEM:
2774       if (mem_set_in_block[bb])
2775         return 1;
2776       else
2777         return expr_killed_p (XEXP (x, 0), bb);
2778
2779     case PC:
2780     case CC0: /*FIXME*/
2781     case CONST:
2782     case CONST_INT:
2783     case CONST_DOUBLE:
2784     case SYMBOL_REF:
2785     case LABEL_REF:
2786     case ADDR_VEC:
2787     case ADDR_DIFF_VEC:
2788       return 0;
2789
2790     default:
2791       break;
2792     }
2793
2794   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2795     {
2796       if (fmt[i] == 'e')
2797         {
2798           /* If we are about to do the last recursive call
2799              needed at this level, change it into iteration.
2800              This function is called enough to be worth it.  */
2801           if (i == 0)
2802             return expr_killed_p (XEXP (x, i), bb);
2803           else if (expr_killed_p (XEXP (x, i), bb))
2804             return 1;
2805         }
2806       else if (fmt[i] == 'E')
2807         for (j = 0; j < XVECLEN (x, i); j++)
2808           if (expr_killed_p (XVECEXP (x, i, j), bb))
2809             return 1;
2810     }
2811
2812   return 0;
2813 }
2814
2815 /* Compute the set of available expressions killed in each basic block.  */
2816
2817 static void
2818 compute_ae_kill (ae_gen, ae_kill)
2819      sbitmap *ae_gen, *ae_kill;
2820 {
2821   int bb;
2822   unsigned int i;
2823   struct expr *expr;
2824
2825   for (bb = 0; bb < n_basic_blocks; bb++)
2826     for (i = 0; i < expr_hash_table_size; i++)
2827       for (expr = expr_hash_table[i]; expr; expr = expr->next_same_hash)
2828         {
2829           /* Skip EXPR if generated in this block.  */
2830           if (TEST_BIT (ae_gen[bb], expr->bitmap_index))
2831             continue;
2832
2833           if (expr_killed_p (expr->expr, bb))
2834             SET_BIT (ae_kill[bb], expr->bitmap_index);
2835         }
2836 }
2837 \f
2838 /* Actually perform the Classic GCSE optimizations.  */
2839
2840 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
2841
2842    CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
2843    as a positive reach.  We want to do this when there are two computations
2844    of the expression in the block.
2845
2846    VISITED is a pointer to a working buffer for tracking which BB's have
2847    been visited.  It is NULL for the top-level call.
2848
2849    We treat reaching expressions that go through blocks containing the same
2850    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
2851    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2852    2 as not reaching.  The intent is to improve the probability of finding
2853    only one reaching expression and to reduce register lifetimes by picking
2854    the closest such expression.  */
2855
2856 static int
2857 expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited)
2858      struct occr *occr;
2859      struct expr *expr;
2860      int bb;
2861      int check_self_loop;
2862      char *visited;
2863 {
2864   edge pred;
2865
2866   for (pred = BASIC_BLOCK(bb)->pred; pred != NULL; pred = pred->pred_next)
2867     {
2868       int pred_bb = pred->src->index;
2869
2870       if (visited[pred_bb])
2871         /* This predecessor has already been visited. Nothing to do.  */
2872           ;
2873       else if (pred_bb == bb)
2874         {
2875           /* BB loops on itself.  */
2876           if (check_self_loop
2877               && TEST_BIT (ae_gen[pred_bb], expr->bitmap_index)
2878               && BLOCK_NUM (occr->insn) == pred_bb)
2879             return 1;
2880
2881           visited[pred_bb] = 1;
2882         }
2883
2884       /* Ignore this predecessor if it kills the expression.  */
2885       else if (TEST_BIT (ae_kill[pred_bb], expr->bitmap_index))
2886         visited[pred_bb] = 1;
2887
2888       /* Does this predecessor generate this expression?  */
2889       else if (TEST_BIT (ae_gen[pred_bb], expr->bitmap_index))
2890         {
2891           /* Is this the occurrence we're looking for?
2892              Note that there's only one generating occurrence per block
2893              so we just need to check the block number.  */
2894           if (BLOCK_NUM (occr->insn) == pred_bb)
2895             return 1;
2896
2897           visited[pred_bb] = 1;
2898         }
2899
2900       /* Neither gen nor kill.  */
2901       else
2902         {
2903           visited[pred_bb] = 1;
2904           if (expr_reaches_here_p_work (occr, expr, pred_bb, check_self_loop,
2905               visited))
2906
2907             return 1;
2908         }
2909     }
2910
2911   /* All paths have been checked.  */
2912   return 0;
2913 }
2914
2915 /* This wrapper for expr_reaches_here_p_work() is to ensure that any
2916    memory allocated for that function is returned. */
2917
2918 static int
2919 expr_reaches_here_p (occr, expr, bb, check_self_loop)
2920      struct occr *occr;
2921      struct expr *expr;
2922      int bb;
2923      int check_self_loop;
2924 {
2925   int rval;
2926   char *visited = (char *) xcalloc (n_basic_blocks, 1);
2927
2928   rval = expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited);
2929
2930   free (visited);
2931   return rval;
2932 }
2933
2934 /* Return the instruction that computes EXPR that reaches INSN's basic block.
2935    If there is more than one such instruction, return NULL.
2936
2937    Called only by handle_avail_expr.  */
2938
2939 static rtx
2940 computing_insn (expr, insn)
2941      struct expr *expr;
2942      rtx insn;
2943 {
2944   int bb = BLOCK_NUM (insn);
2945
2946   if (expr->avail_occr->next == NULL)
2947     {
2948       if (BLOCK_NUM (expr->avail_occr->insn) == bb)
2949         /* The available expression is actually itself
2950            (i.e. a loop in the flow graph) so do nothing.  */
2951         return NULL;
2952
2953       /* (FIXME) Case that we found a pattern that was created by
2954          a substitution that took place.  */
2955       return expr->avail_occr->insn;
2956     }
2957   else
2958     {
2959       /* Pattern is computed more than once.
2960          Search backwards from this insn to see how many of these
2961          computations actually reach this insn.  */
2962       struct occr *occr;
2963       rtx insn_computes_expr = NULL;
2964       int can_reach = 0;
2965
2966       for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
2967         {
2968           if (BLOCK_NUM (occr->insn) == bb)
2969             {
2970               /* The expression is generated in this block.
2971                  The only time we care about this is when the expression
2972                  is generated later in the block [and thus there's a loop].
2973                  We let the normal cse pass handle the other cases.  */
2974               if (INSN_CUID (insn) < INSN_CUID (occr->insn)
2975                   && expr_reaches_here_p (occr, expr, bb, 1))
2976                 {
2977                   can_reach++;
2978                   if (can_reach > 1)
2979                     return NULL;
2980
2981                   insn_computes_expr = occr->insn;
2982                 }
2983             }
2984           else if (expr_reaches_here_p (occr, expr, bb, 0))
2985             {
2986               can_reach++;
2987               if (can_reach > 1)
2988                 return NULL;
2989
2990               insn_computes_expr = occr->insn;
2991             }
2992         }
2993
2994       if (insn_computes_expr == NULL)
2995         abort ();
2996
2997       return insn_computes_expr;
2998     }
2999 }
3000
3001 /* Return non-zero if the definition in DEF_INSN can reach INSN.
3002    Only called by can_disregard_other_sets.  */
3003
3004 static int
3005 def_reaches_here_p (insn, def_insn)
3006      rtx insn, def_insn;
3007 {
3008   rtx reg;
3009
3010   if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn)))
3011     return 1;
3012
3013   if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn))
3014     {
3015       if (INSN_CUID (def_insn) < INSN_CUID (insn))
3016         {
3017           if (GET_CODE (PATTERN (def_insn)) == PARALLEL)
3018             return 1;
3019           else if (GET_CODE (PATTERN (def_insn)) == CLOBBER)
3020             reg = XEXP (PATTERN (def_insn), 0);
3021           else if (GET_CODE (PATTERN (def_insn)) == SET)
3022             reg = SET_DEST (PATTERN (def_insn));
3023           else
3024             abort ();
3025
3026           return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn);
3027         }
3028       else
3029         return 0;
3030     }
3031
3032   return 0;
3033 }
3034
3035 /* Return non-zero if *ADDR_THIS_REG can only have one value at INSN.  The
3036    value returned is the number of definitions that reach INSN.  Returning a
3037    value of zero means that [maybe] more than one definition reaches INSN and
3038    the caller can't perform whatever optimization it is trying.  i.e. it is
3039    always safe to return zero.  */
3040
3041 static int
3042 can_disregard_other_sets (addr_this_reg, insn, for_combine)
3043      struct reg_set **addr_this_reg;
3044      rtx insn;
3045      int for_combine;
3046 {
3047   int number_of_reaching_defs = 0;
3048   struct reg_set *this_reg;
3049
3050   for (this_reg = *addr_this_reg; this_reg != 0; this_reg = this_reg->next)
3051     if (def_reaches_here_p (insn, this_reg->insn))
3052       {
3053         number_of_reaching_defs++;
3054         /* Ignore parallels for now.  */
3055         if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL)
3056           return 0;
3057
3058         if (!for_combine
3059             && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER
3060                 || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3061                                   SET_SRC (PATTERN (insn)))))
3062           /* A setting of the reg to a different value reaches INSN.  */
3063           return 0;
3064
3065         if (number_of_reaching_defs > 1)
3066           {
3067             /* If in this setting the value the register is being set to is
3068                equal to the previous value the register was set to and this
3069                setting reaches the insn we are trying to do the substitution
3070                on then we are ok.  */
3071             if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER)
3072               return 0;
3073             else if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3074                                     SET_SRC (PATTERN (insn))))
3075               return 0;
3076           }
3077
3078         *addr_this_reg = this_reg;
3079       }
3080
3081   return number_of_reaching_defs;
3082 }
3083
3084 /* Expression computed by insn is available and the substitution is legal,
3085    so try to perform the substitution.
3086
3087    The result is non-zero if any changes were made.  */
3088
3089 static int
3090 handle_avail_expr (insn, expr)
3091      rtx insn;
3092      struct expr *expr;
3093 {
3094   rtx pat, insn_computes_expr;
3095   rtx to;
3096   struct reg_set *this_reg;
3097   int found_setting, use_src;
3098   int changed = 0;
3099
3100   /* We only handle the case where one computation of the expression
3101      reaches this instruction.  */
3102   insn_computes_expr = computing_insn (expr, insn);
3103   if (insn_computes_expr == NULL)
3104     return 0;
3105
3106   found_setting = 0;
3107   use_src = 0;
3108
3109   /* At this point we know only one computation of EXPR outside of this
3110      block reaches this insn.  Now try to find a register that the
3111      expression is computed into.  */
3112   if (GET_CODE (SET_SRC (PATTERN (insn_computes_expr))) == REG)
3113     {
3114       /* This is the case when the available expression that reaches
3115          here has already been handled as an available expression.  */
3116       unsigned int regnum_for_replacing
3117         = REGNO (SET_SRC (PATTERN (insn_computes_expr)));
3118
3119       /* If the register was created by GCSE we can't use `reg_set_table',
3120          however we know it's set only once.  */
3121       if (regnum_for_replacing >= max_gcse_regno
3122           /* If the register the expression is computed into is set only once,
3123              or only one set reaches this insn, we can use it.  */
3124           || (((this_reg = reg_set_table[regnum_for_replacing]),
3125                this_reg->next == NULL)
3126               || can_disregard_other_sets (&this_reg, insn, 0)))
3127        {
3128          use_src = 1;
3129          found_setting = 1;
3130        }
3131     }
3132
3133   if (!found_setting)
3134     {
3135       unsigned int regnum_for_replacing
3136         = REGNO (SET_DEST (PATTERN (insn_computes_expr)));
3137
3138       /* This shouldn't happen.  */
3139       if (regnum_for_replacing >= max_gcse_regno)
3140         abort ();
3141
3142       this_reg = reg_set_table[regnum_for_replacing];
3143
3144       /* If the register the expression is computed into is set only once,
3145          or only one set reaches this insn, use it.  */
3146       if (this_reg->next == NULL
3147           || can_disregard_other_sets (&this_reg, insn, 0))
3148         found_setting = 1;
3149     }
3150
3151   if (found_setting)
3152     {
3153       pat = PATTERN (insn);
3154       if (use_src)
3155         to = SET_SRC (PATTERN (insn_computes_expr));
3156       else
3157         to = SET_DEST (PATTERN (insn_computes_expr));
3158       changed = validate_change (insn, &SET_SRC (pat), to, 0);
3159
3160       /* We should be able to ignore the return code from validate_change but
3161          to play it safe we check.  */
3162       if (changed)
3163         {
3164           gcse_subst_count++;
3165           if (gcse_file != NULL)
3166             {
3167               fprintf (gcse_file, "GCSE: Replacing the source in insn %d with",
3168                        INSN_UID (insn));
3169               fprintf (gcse_file, " reg %d %s insn %d\n",
3170                        REGNO (to), use_src ? "from" : "set in",
3171                        INSN_UID (insn_computes_expr));
3172             }
3173         }
3174     }
3175
3176   /* The register that the expr is computed into is set more than once.  */
3177   else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3178     {
3179       /* Insert an insn after insnx that copies the reg set in insnx
3180          into a new pseudo register call this new register REGN.
3181          From insnb until end of basic block or until REGB is set
3182          replace all uses of REGB with REGN.  */
3183       rtx new_insn;
3184
3185       to = gen_reg_rtx (GET_MODE (SET_DEST (PATTERN (insn_computes_expr))));
3186
3187       /* Generate the new insn.  */
3188       /* ??? If the change fails, we return 0, even though we created
3189          an insn.  I think this is ok.  */
3190       new_insn
3191         = emit_insn_after (gen_rtx_SET (VOIDmode, to,
3192                                         SET_DEST (PATTERN
3193                                                   (insn_computes_expr))),
3194                            insn_computes_expr);
3195
3196       /* Keep block number table up to date.  */
3197       set_block_num (new_insn, BLOCK_NUM (insn_computes_expr));
3198
3199       /* Keep register set table up to date.  */
3200       record_one_set (REGNO (to), new_insn);
3201
3202       gcse_create_count++;
3203       if (gcse_file != NULL)
3204         {
3205           fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d",
3206                    INSN_UID (NEXT_INSN (insn_computes_expr)),
3207                    REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))));
3208           fprintf (gcse_file, ", computed in insn %d,\n",
3209                    INSN_UID (insn_computes_expr));
3210           fprintf (gcse_file, "      into newly allocated reg %d\n",
3211                    REGNO (to));
3212         }
3213
3214       pat = PATTERN (insn);
3215
3216       /* Do register replacement for INSN.  */
3217       changed = validate_change (insn, &SET_SRC (pat),
3218                                  SET_DEST (PATTERN
3219                                            (NEXT_INSN (insn_computes_expr))),
3220                                  0);
3221
3222       /* We should be able to ignore the return code from validate_change but
3223          to play it safe we check.  */
3224       if (changed)
3225         {
3226           gcse_subst_count++;
3227           if (gcse_file != NULL)
3228             {
3229               fprintf (gcse_file,
3230                        "GCSE: Replacing the source in insn %d with reg %d ",
3231                        INSN_UID (insn),
3232                        REGNO (SET_DEST (PATTERN (NEXT_INSN
3233                                                  (insn_computes_expr)))));
3234               fprintf (gcse_file, "set in insn %d\n",
3235                        INSN_UID (insn_computes_expr));
3236             }
3237         }
3238     }
3239
3240   return changed;
3241 }
3242
3243 /* Perform classic GCSE.  This is called by one_classic_gcse_pass after all
3244    the dataflow analysis has been done.
3245
3246    The result is non-zero if a change was made.  */
3247
3248 static int
3249 classic_gcse ()
3250 {
3251   int bb, changed;
3252   rtx insn;
3253
3254   /* Note we start at block 1.  */
3255
3256   changed = 0;
3257   for (bb = 1; bb < n_basic_blocks; bb++)
3258     {
3259       /* Reset tables used to keep track of what's still valid [since the
3260          start of the block].  */
3261       reset_opr_set_tables ();
3262
3263       for (insn = BLOCK_HEAD (bb);
3264            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3265            insn = NEXT_INSN (insn))
3266         {
3267           /* Is insn of form (set (pseudo-reg) ...)?  */
3268           if (GET_CODE (insn) == INSN
3269               && GET_CODE (PATTERN (insn)) == SET
3270               && GET_CODE (SET_DEST (PATTERN (insn))) == REG
3271               && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER)
3272             {
3273               rtx pat = PATTERN (insn);
3274               rtx src = SET_SRC (pat);
3275               struct expr *expr;
3276
3277               if (want_to_gcse_p (src)
3278                   /* Is the expression recorded?  */
3279                   && ((expr = lookup_expr (src)) != NULL)
3280                   /* Is the expression available [at the start of the
3281                      block]?  */
3282                   && TEST_BIT (ae_in[bb], expr->bitmap_index)
3283                   /* Are the operands unchanged since the start of the
3284                      block?  */
3285                   && oprs_not_set_p (src, insn))
3286                 changed |= handle_avail_expr (insn, expr);
3287             }
3288
3289           /* Keep track of everything modified by this insn.  */
3290           /* ??? Need to be careful w.r.t. mods done to INSN.  */
3291           if (INSN_P (insn))
3292             mark_oprs_set (insn);
3293         }
3294     }
3295
3296   return changed;
3297 }
3298
3299 /* Top level routine to perform one classic GCSE pass.
3300
3301    Return non-zero if a change was made.  */
3302
3303 static int
3304 one_classic_gcse_pass (pass)
3305      int pass;
3306 {
3307   int changed = 0;
3308
3309   gcse_subst_count = 0;
3310   gcse_create_count = 0;
3311
3312   alloc_expr_hash_table (max_cuid);
3313   alloc_rd_mem (n_basic_blocks, max_cuid);
3314   compute_expr_hash_table ();
3315   if (gcse_file)
3316     dump_hash_table (gcse_file, "Expression", expr_hash_table,
3317                      expr_hash_table_size, n_exprs);
3318
3319   if (n_exprs > 0)
3320     {
3321       compute_kill_rd ();
3322       compute_rd ();
3323       alloc_avail_expr_mem (n_basic_blocks, n_exprs);
3324       compute_ae_gen ();
3325       compute_ae_kill (ae_gen, ae_kill);
3326       compute_available (ae_gen, ae_kill, ae_out, ae_in);
3327       changed = classic_gcse ();
3328       free_avail_expr_mem ();
3329     }
3330
3331   free_rd_mem ();
3332   free_expr_hash_table ();
3333
3334   if (gcse_file)
3335     {
3336       fprintf (gcse_file, "\n");
3337       fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs,",
3338                current_function_name, pass, bytes_used, gcse_subst_count);
3339       fprintf (gcse_file, "%d insns created\n", gcse_create_count);
3340     }
3341
3342   return changed;
3343 }
3344 \f
3345 /* Compute copy/constant propagation working variables.  */
3346
3347 /* Local properties of assignments.  */
3348 static sbitmap *cprop_pavloc;
3349 static sbitmap *cprop_absaltered;
3350
3351 /* Global properties of assignments (computed from the local properties).  */
3352 static sbitmap *cprop_avin;
3353 static sbitmap *cprop_avout;
3354
3355 /* Allocate vars used for copy/const propagation.  N_BLOCKS is the number of
3356    basic blocks.  N_SETS is the number of sets.  */
3357
3358 static void
3359 alloc_cprop_mem (n_blocks, n_sets)
3360      int n_blocks, n_sets;
3361 {
3362   cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
3363   cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
3364
3365   cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
3366   cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
3367 }
3368
3369 /* Free vars used by copy/const propagation.  */
3370
3371 static void
3372 free_cprop_mem ()
3373 {
3374   free (cprop_pavloc);
3375   free (cprop_absaltered);
3376   free (cprop_avin);
3377   free (cprop_avout);
3378 }
3379
3380 /* For each block, compute whether X is transparent.  X is either an
3381    expression or an assignment [though we don't care which, for this context
3382    an assignment is treated as an expression].  For each block where an
3383    element of X is modified, set (SET_P == 1) or reset (SET_P == 0) the INDX
3384    bit in BMAP.  */
3385
3386 static void
3387 compute_transp (x, indx, bmap, set_p)
3388      rtx x;
3389      int indx;
3390      sbitmap *bmap;
3391      int set_p;
3392 {
3393   int bb, i, j;
3394   enum rtx_code code;
3395   reg_set *r;
3396   const char *fmt;
3397
3398   /* repeat is used to turn tail-recursion into iteration since GCC
3399      can't do it when there's no return value.  */
3400  repeat:
3401
3402   if (x == 0)
3403     return;
3404
3405   code = GET_CODE (x);
3406   switch (code)
3407     {
3408     case REG:
3409       if (set_p)
3410         {
3411           if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3412             {
3413               for (bb = 0; bb < n_basic_blocks; bb++)
3414                 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3415                   SET_BIT (bmap[bb], indx);
3416             }
3417           else
3418             {
3419               for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3420                 SET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3421             }
3422         }
3423       else
3424         {
3425           if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3426             {
3427               for (bb = 0; bb < n_basic_blocks; bb++)
3428                 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3429                   RESET_BIT (bmap[bb], indx);
3430             }
3431           else
3432             {
3433               for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3434                 RESET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3435             }
3436         }
3437
3438       return;
3439
3440     case MEM:
3441       if (set_p)
3442         {
3443           for (bb = 0; bb < n_basic_blocks; bb++)
3444             if (mem_set_in_block[bb])
3445               SET_BIT (bmap[bb], indx);
3446         }
3447       else
3448         {
3449           for (bb = 0; bb < n_basic_blocks; bb++)
3450             if (mem_set_in_block[bb])
3451               RESET_BIT (bmap[bb], indx);
3452         }
3453
3454       x = XEXP (x, 0);
3455       goto repeat;
3456
3457     case PC:
3458     case CC0: /*FIXME*/
3459     case CONST:
3460     case CONST_INT:
3461     case CONST_DOUBLE:
3462     case SYMBOL_REF:
3463     case LABEL_REF:
3464     case ADDR_VEC:
3465     case ADDR_DIFF_VEC:
3466       return;
3467
3468     default:
3469       break;
3470     }
3471
3472   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3473     {
3474       if (fmt[i] == 'e')
3475         {
3476           /* If we are about to do the last recursive call
3477              needed at this level, change it into iteration.
3478              This function is called enough to be worth it.  */
3479           if (i == 0)
3480             {
3481               x = XEXP (x, i);
3482               goto repeat;
3483             }
3484
3485           compute_transp (XEXP (x, i), indx, bmap, set_p);
3486         }
3487       else if (fmt[i] == 'E')
3488         for (j = 0; j < XVECLEN (x, i); j++)
3489           compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
3490     }
3491 }
3492
3493 /* Top level routine to do the dataflow analysis needed by copy/const
3494    propagation.  */
3495
3496 static void
3497 compute_cprop_data ()
3498 {
3499   compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, 1);
3500   compute_available (cprop_pavloc, cprop_absaltered,
3501                      cprop_avout, cprop_avin);
3502 }
3503 \f
3504 /* Copy/constant propagation.  */
3505
3506 /* Maximum number of register uses in an insn that we handle.  */
3507 #define MAX_USES 8
3508
3509 /* Table of uses found in an insn.
3510    Allocated statically to avoid alloc/free complexity and overhead.  */
3511 static struct reg_use reg_use_table[MAX_USES];
3512
3513 /* Index into `reg_use_table' while building it.  */
3514 static int reg_use_count;
3515
3516 /* Set up a list of register numbers used in INSN.  The found uses are stored
3517    in `reg_use_table'.  `reg_use_count' is initialized to zero before entry,
3518    and contains the number of uses in the table upon exit.
3519
3520    ??? If a register appears multiple times we will record it multiple times.
3521    This doesn't hurt anything but it will slow things down.  */
3522
3523 static void
3524 find_used_regs (x)
3525      rtx x;
3526 {
3527   int i, j;
3528   enum rtx_code code;
3529   const char *fmt;
3530
3531   /* repeat is used to turn tail-recursion into iteration since GCC
3532      can't do it when there's no return value.  */
3533  repeat:
3534
3535   if (x == 0)
3536     return;
3537
3538   code = GET_CODE (x);
3539   switch (code)
3540     {
3541     case REG:
3542       if (reg_use_count == MAX_USES)
3543         return;
3544
3545       reg_use_table[reg_use_count].reg_rtx = x;
3546       reg_use_count++;
3547       return;
3548
3549     case MEM:
3550       x = XEXP (x, 0);
3551       goto repeat;
3552
3553     case PC:
3554     case CC0:
3555     case CONST:
3556     case CONST_INT:
3557     case CONST_DOUBLE:
3558     case SYMBOL_REF:
3559     case LABEL_REF:
3560     case CLOBBER:
3561     case ADDR_VEC:
3562     case ADDR_DIFF_VEC:
3563     case ASM_INPUT: /*FIXME*/
3564       return;
3565
3566     case SET:
3567       if (GET_CODE (SET_DEST (x)) == MEM)
3568         find_used_regs (SET_DEST (x));
3569       x = SET_SRC (x);
3570       goto repeat;
3571
3572     default:
3573       break;
3574     }
3575
3576   /* Recursively scan the operands of this expression.  */
3577
3578   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3579     {
3580       if (fmt[i] == 'e')
3581         {
3582           /* If we are about to do the last recursive call
3583              needed at this level, change it into iteration.
3584              This function is called enough to be worth it.  */
3585           if (i == 0)
3586             {
3587               x = XEXP (x, 0);
3588               goto repeat;
3589             }
3590
3591           find_used_regs (XEXP (x, i));
3592         }
3593       else if (fmt[i] == 'E')
3594         for (j = 0; j < XVECLEN (x, i); j++)
3595           find_used_regs (XVECEXP (x, i, j));
3596     }
3597 }
3598
3599 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3600    Returns non-zero is successful.  */
3601
3602 static int
3603 try_replace_reg (from, to, insn)
3604      rtx from, to, insn;
3605 {
3606   rtx note;
3607   rtx src;
3608   int success;
3609   rtx set;
3610
3611   note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3612
3613   if (!note)
3614     note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3615
3616   /* If this fails we could try to simplify the result of the
3617      replacement and attempt to recognize the simplified insn.
3618
3619      But we need a general simplify_rtx that doesn't have pass
3620      specific state variables.  I'm not aware of one at the moment.  */
3621
3622   success = validate_replace_src (from, to, insn);
3623   set = single_set (insn);
3624
3625   /* We've failed to do replacement. Try to add REG_EQUAL note to not loose
3626      information.  */
3627   if (!success && !note)
3628     {
3629       if (!set)
3630         return 0;
3631
3632       note = REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL,
3633                                                    copy_rtx (SET_SRC (set)),
3634                                                    REG_NOTES (insn));
3635     }
3636
3637   /* Always do the replacement in REQ_EQUAL and REG_EQUIV notes.  Also
3638      try to simplify them.  */
3639   if (note)
3640     {
3641       rtx simplified;
3642
3643       if (!validate_replace_rtx_subexp (from, to, insn, &XEXP (note, 0)))
3644         abort();
3645
3646       src = XEXP (note, 0);
3647
3648       /* Try to simplify resulting note. */
3649       simplified = simplify_rtx (src);
3650       if (simplified)
3651         {
3652           src = simplified;
3653           XEXP (note, 0) = src;
3654         }
3655
3656       /* REG_EQUAL may get simplified into register.
3657          We don't allow that. Remove that note. This code ought
3658          not to hapen, because previous code ought to syntetize
3659          reg-reg move, but be on the safe side.  */
3660       else if (REG_P (src))
3661         remove_note (insn, note);
3662     }
3663   return success;
3664 }
3665
3666 /* Find a set of REGNOs that are available on entry to INSN's block.  Returns
3667    NULL no such set is found.  */
3668
3669 static struct expr *
3670 find_avail_set (regno, insn)
3671      int regno;
3672      rtx insn;
3673 {
3674   /* SET1 contains the last set found that can be returned to the caller for
3675      use in a substitution.  */
3676   struct expr *set1 = 0;
3677
3678   /* Loops are not possible here.  To get a loop we would need two sets
3679      available at the start of the block containing INSN.  ie we would
3680      need two sets like this available at the start of the block:
3681
3682        (set (reg X) (reg Y))
3683        (set (reg Y) (reg X))
3684
3685      This can not happen since the set of (reg Y) would have killed the
3686      set of (reg X) making it unavailable at the start of this block.  */
3687   while (1)
3688      {
3689       rtx src;
3690       struct expr *set = lookup_set (regno, NULL_RTX);
3691
3692       /* Find a set that is available at the start of the block
3693          which contains INSN.  */
3694       while (set)
3695         {
3696           if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
3697             break;
3698           set = next_set (regno, set);
3699         }
3700
3701       /* If no available set was found we've reached the end of the
3702          (possibly empty) copy chain.  */
3703       if (set == 0)
3704         break;
3705
3706       if (GET_CODE (set->expr) != SET)
3707         abort ();
3708
3709       src = SET_SRC (set->expr);
3710
3711       /* We know the set is available.
3712          Now check that SRC is ANTLOC (i.e. none of the source operands
3713          have changed since the start of the block).
3714
3715          If the source operand changed, we may still use it for the next
3716          iteration of this loop, but we may not use it for substitutions.  */
3717
3718       if (CONSTANT_P (src) || oprs_not_set_p (src, insn))
3719         set1 = set;
3720
3721       /* If the source of the set is anything except a register, then
3722          we have reached the end of the copy chain.  */
3723       if (GET_CODE (src) != REG)
3724         break;
3725
3726       /* Follow the copy chain, ie start another iteration of the loop
3727          and see if we have an available copy into SRC.  */
3728       regno = REGNO (src);
3729      }
3730
3731   /* SET1 holds the last set that was available and anticipatable at
3732      INSN.  */
3733   return set1;
3734 }
3735
3736 /* Subroutine of cprop_insn that tries to propagate constants into
3737    JUMP_INSNS.  INSN must be a conditional jump; COPY is a copy of it
3738    that we can use for substitutions.
3739    REG_USED is the use we will try to replace, SRC is the constant we
3740    will try to substitute for it.
3741    Returns nonzero if a change was made.  */
3742
3743 static int
3744 cprop_jump (insn, copy, reg_used, src)
3745      rtx insn, copy;
3746      struct reg_use *reg_used;
3747      rtx src;
3748 {
3749   rtx set = PATTERN (copy);
3750   rtx temp;
3751
3752   /* Replace the register with the appropriate constant.  */
3753   replace_rtx (SET_SRC (set), reg_used->reg_rtx, src);
3754
3755   temp = simplify_ternary_operation (GET_CODE (SET_SRC (set)),
3756                                      GET_MODE (SET_SRC (set)),
3757                                      GET_MODE (XEXP (SET_SRC (set), 0)),
3758                                      XEXP (SET_SRC (set), 0),
3759                                      XEXP (SET_SRC (set), 1),
3760                                      XEXP (SET_SRC (set), 2));
3761
3762   /* If no simplification can be made, then try the next
3763      register.  */
3764   if (temp == 0)
3765     return 0;
3766
3767   SET_SRC (set) = temp;
3768
3769   /* That may have changed the structure of TEMP, so
3770      force it to be rerecognized if it has not turned
3771      into a nop or unconditional jump.  */
3772
3773   INSN_CODE (copy) = -1;
3774   if ((SET_DEST (set) == pc_rtx
3775        && (SET_SRC (set) == pc_rtx
3776            || GET_CODE (SET_SRC (set)) == LABEL_REF))
3777       || recog (PATTERN (copy), copy, NULL) >= 0)
3778     {
3779       /* This has either become an unconditional jump
3780          or a nop-jump.  We'd like to delete nop jumps
3781          here, but doing so confuses gcse.  So we just
3782          make the replacement and let later passes
3783          sort things out.  */
3784       PATTERN (insn) = set;
3785       INSN_CODE (insn) = -1;
3786
3787       /* One less use of the label this insn used to jump to
3788          if we turned this into a NOP jump.  */
3789       if (SET_SRC (set) == pc_rtx && JUMP_LABEL (insn) != 0)
3790         --LABEL_NUSES (JUMP_LABEL (insn));
3791
3792       /* If this has turned into an unconditional jump,
3793          then put a barrier after it so that the unreachable
3794          code will be deleted.  */
3795       if (GET_CODE (SET_SRC (set)) == LABEL_REF)
3796         emit_barrier_after (insn);
3797
3798       run_jump_opt_after_gcse = 1;
3799
3800       const_prop_count++;
3801       if (gcse_file != NULL)
3802         {
3803           fprintf (gcse_file,
3804                    "CONST-PROP: Replacing reg %d in insn %d with constant ",
3805                    REGNO (reg_used->reg_rtx), INSN_UID (insn));
3806           print_rtl (gcse_file, src);
3807           fprintf (gcse_file, "\n");
3808         }
3809
3810       return 1;
3811     }
3812   return 0;
3813 }
3814
3815 #ifdef HAVE_cc0
3816
3817 /* Subroutine of cprop_insn that tries to propagate constants into JUMP_INSNS
3818    for machines that have CC0.  INSN is a single set that stores into CC0;
3819    the insn following it is a conditional jump.  REG_USED is the use we will
3820    try to replace, SRC is the constant we will try to substitute for it.
3821    Returns nonzero if a change was made.  */
3822
3823 static int
3824 cprop_cc0_jump (insn, reg_used, src)
3825      rtx insn;
3826      struct reg_use *reg_used;
3827      rtx src;
3828 {
3829   rtx jump = NEXT_INSN (insn);
3830   rtx copy = copy_rtx (jump);
3831   rtx set = PATTERN (copy);
3832
3833   /* We need to copy the source of the cc0 setter, as cprop_jump is going to
3834      substitute into it.  */
3835   replace_rtx (SET_SRC (set), cc0_rtx, copy_rtx (SET_SRC (PATTERN (insn))));
3836   if (! cprop_jump (jump, copy, reg_used, src))
3837     return 0;
3838
3839   /* If we succeeded, delete the cc0 setter.  */
3840   PUT_CODE (insn, NOTE);
3841   NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3842   NOTE_SOURCE_FILE (insn) = 0;
3843   return 1;
3844  }
3845 #endif
3846
3847 /* Perform constant and copy propagation on INSN.
3848    The result is non-zero if a change was made.  */
3849
3850 static int
3851 cprop_insn (insn, alter_jumps)
3852      rtx insn;
3853      int alter_jumps;
3854 {
3855   struct reg_use *reg_used;
3856   int changed = 0;
3857   rtx note;
3858
3859   /* Only propagate into SETs.  Note that a conditional jump is a
3860      SET with pc_rtx as the destination.  */
3861   if ((GET_CODE (insn) != INSN
3862        && GET_CODE (insn) != JUMP_INSN)
3863       || GET_CODE (PATTERN (insn)) != SET)
3864     return 0;
3865
3866   reg_use_count = 0;
3867   find_used_regs (PATTERN (insn));
3868
3869   note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3870   if (!note)
3871     note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3872
3873   /* We may win even when propagating constants into notes. */
3874   if (note)
3875     find_used_regs (XEXP (note, 0));
3876
3877   for (reg_used = &reg_use_table[0]; reg_use_count > 0;
3878        reg_used++, reg_use_count--)
3879     {
3880       unsigned int regno = REGNO (reg_used->reg_rtx);
3881       rtx pat, src;
3882       struct expr *set;
3883
3884       /* Ignore registers created by GCSE.
3885          We do this because ... */
3886       if (regno >= max_gcse_regno)
3887         continue;
3888
3889       /* If the register has already been set in this block, there's
3890          nothing we can do.  */
3891       if (! oprs_not_set_p (reg_used->reg_rtx, insn))
3892         continue;
3893
3894       /* Find an assignment that sets reg_used and is available
3895          at the start of the block.  */
3896       set = find_avail_set (regno, insn);
3897       if (! set)
3898         continue;
3899
3900       pat = set->expr;
3901       /* ??? We might be able to handle PARALLELs.  Later.  */
3902       if (GET_CODE (pat) != SET)
3903         abort ();
3904
3905       src = SET_SRC (pat);
3906
3907       /* Constant propagation.  */
3908       if (GET_CODE (src) == CONST_INT || GET_CODE (src) == CONST_DOUBLE
3909           || GET_CODE (src) == SYMBOL_REF)
3910         {
3911           /* Handle normal insns first.  */
3912           if (GET_CODE (insn) == INSN
3913               && try_replace_reg (reg_used->reg_rtx, src, insn))
3914             {
3915               changed = 1;
3916               const_prop_count++;
3917               if (gcse_file != NULL)
3918                 {
3919                   fprintf (gcse_file, "CONST-PROP: Replacing reg %d in ",
3920                            regno);
3921                   fprintf (gcse_file, "insn %d with constant ",
3922                            INSN_UID (insn));
3923                   print_rtl (gcse_file, src);
3924                   fprintf (gcse_file, "\n");
3925                 }
3926
3927               /* The original insn setting reg_used may or may not now be
3928                  deletable.  We leave the deletion to flow.  */
3929             }
3930
3931           /* Try to propagate a CONST_INT into a conditional jump.
3932              We're pretty specific about what we will handle in this
3933              code, we can extend this as necessary over time.
3934
3935              Right now the insn in question must look like
3936              (set (pc) (if_then_else ...))  */
3937           else if (alter_jumps
3938                    && GET_CODE (insn) == JUMP_INSN
3939                    && condjump_p (insn)
3940                    && ! simplejump_p (insn))
3941             changed |= cprop_jump (insn, copy_rtx (insn), reg_used, src);
3942 #ifdef HAVE_cc0
3943           /* Similar code for machines that use a pair of CC0 setter and
3944              conditional jump insn.  */
3945           else if (alter_jumps
3946                    && GET_CODE (PATTERN (insn)) == SET
3947                    && SET_DEST (PATTERN (insn)) == cc0_rtx
3948                    && GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
3949                    && condjump_p (NEXT_INSN (insn))
3950                    && ! simplejump_p (NEXT_INSN (insn)))
3951             {
3952               if (cprop_cc0_jump (insn, reg_used, src))
3953                 {
3954                   changed = 1;
3955                   break;
3956                 }
3957             }
3958 #endif
3959         }
3960       else if (GET_CODE (src) == REG
3961                && REGNO (src) >= FIRST_PSEUDO_REGISTER
3962                && REGNO (src) != regno)
3963         {
3964           if (try_replace_reg (reg_used->reg_rtx, src, insn))
3965             {
3966               changed = 1;
3967               copy_prop_count++;
3968               if (gcse_file != NULL)
3969                 {
3970                   fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d",
3971                            regno, INSN_UID (insn));
3972                   fprintf (gcse_file, " with reg %d\n", REGNO (src));
3973                 }
3974
3975               /* The original insn setting reg_used may or may not now be
3976                  deletable.  We leave the deletion to flow.  */
3977               /* FIXME: If it turns out that the insn isn't deletable,
3978                  then we may have unnecessarily extended register lifetimes
3979                  and made things worse.  */
3980             }
3981         }
3982     }
3983
3984   return changed;
3985 }
3986
3987 /* Forward propagate copies.  This includes copies and constants.  Return
3988    non-zero if a change was made.  */
3989
3990 static int
3991 cprop (alter_jumps)
3992      int alter_jumps;
3993 {
3994   int bb, changed;
3995   rtx insn;
3996
3997   /* Note we start at block 1.  */
3998
3999   changed = 0;
4000   for (bb = 1; bb < n_basic_blocks; bb++)
4001     {
4002       /* Reset tables used to keep track of what's still valid [since the
4003          start of the block].  */
4004       reset_opr_set_tables ();
4005
4006       for (insn = BLOCK_HEAD (bb);
4007            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
4008            insn = NEXT_INSN (insn))
4009         {
4010           if (INSN_P (insn))
4011             {
4012               changed |= cprop_insn (insn, alter_jumps);
4013
4014               /* Keep track of everything modified by this insn.  */
4015               /* ??? Need to be careful w.r.t. mods done to INSN.  Don't
4016                  call mark_oprs_set if we turned the insn into a NOTE.  */
4017               if (GET_CODE (insn) != NOTE)
4018                 mark_oprs_set (insn);
4019             }
4020         }
4021     }
4022
4023   if (gcse_file != NULL)
4024     fprintf (gcse_file, "\n");
4025
4026   return changed;
4027 }
4028
4029 /* Perform one copy/constant propagation pass.
4030    F is the first insn in the function.
4031    PASS is the pass count.  */
4032
4033 static int
4034 one_cprop_pass (pass, alter_jumps)
4035      int pass;
4036      int alter_jumps;
4037 {
4038   int changed = 0;
4039
4040   const_prop_count = 0;
4041   copy_prop_count = 0;
4042
4043   alloc_set_hash_table (max_cuid);
4044   compute_set_hash_table ();
4045   if (gcse_file)
4046     dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size,
4047                      n_sets);
4048   if (n_sets > 0)
4049     {
4050       alloc_cprop_mem (n_basic_blocks, n_sets);
4051       compute_cprop_data ();
4052       changed = cprop (alter_jumps);
4053       free_cprop_mem ();
4054     }
4055
4056   free_set_hash_table ();
4057
4058   if (gcse_file)
4059     {
4060       fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, ",
4061                current_function_name, pass, bytes_used);
4062       fprintf (gcse_file, "%d const props, %d copy props\n\n",
4063                const_prop_count, copy_prop_count);
4064     }
4065
4066   return changed;
4067 }
4068 \f
4069 /* Compute PRE+LCM working variables.  */
4070
4071 /* Local properties of expressions.  */
4072 /* Nonzero for expressions that are transparent in the block.  */
4073 static sbitmap *transp;
4074
4075 /* Nonzero for expressions that are transparent at the end of the block.
4076    This is only zero for expressions killed by abnormal critical edge
4077    created by a calls.  */
4078 static sbitmap *transpout;
4079
4080 /* Nonzero for expressions that are computed (available) in the block.  */
4081 static sbitmap *comp;
4082
4083 /* Nonzero for expressions that are locally anticipatable in the block.  */
4084 static sbitmap *antloc;
4085
4086 /* Nonzero for expressions where this block is an optimal computation
4087    point.  */
4088 static sbitmap *pre_optimal;
4089
4090 /* Nonzero for expressions which are redundant in a particular block.  */
4091 static sbitmap *pre_redundant;
4092
4093 /* Nonzero for expressions which should be inserted on a specific edge.  */
4094 static sbitmap *pre_insert_map;
4095
4096 /* Nonzero for expressions which should be deleted in a specific block.  */
4097 static sbitmap *pre_delete_map;
4098
4099 /* Contains the edge_list returned by pre_edge_lcm.  */
4100 static struct edge_list *edge_list;
4101
4102 /* Redundant insns.  */
4103 static sbitmap pre_redundant_insns;
4104
4105 /* Allocate vars used for PRE analysis.  */
4106
4107 static void
4108 alloc_pre_mem (n_blocks, n_exprs)
4109      int n_blocks, n_exprs;
4110 {
4111   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4112   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4113   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
4114
4115   pre_optimal = NULL;
4116   pre_redundant = NULL;
4117   pre_insert_map = NULL;
4118   pre_delete_map = NULL;
4119   ae_in = NULL;
4120   ae_out = NULL;
4121   ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
4122
4123   /* pre_insert and pre_delete are allocated later.  */
4124 }
4125
4126 /* Free vars used for PRE analysis.  */
4127
4128 static void
4129 free_pre_mem ()
4130 {
4131   free (transp);
4132   free (comp);
4133
4134   /* ANTLOC and AE_KILL are freed just after pre_lcm finishes.  */
4135
4136   if (pre_optimal)
4137     free (pre_optimal);
4138   if (pre_redundant)
4139     free (pre_redundant);
4140   if (pre_insert_map)
4141     free (pre_insert_map);
4142   if (pre_delete_map)
4143     free (pre_delete_map);
4144
4145   if (ae_in)
4146     free (ae_in);
4147   if (ae_out)
4148     free (ae_out);
4149
4150   transp = comp = NULL;
4151   pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
4152   ae_in = ae_out = NULL;
4153 }
4154
4155 /* Top level routine to do the dataflow analysis needed by PRE.  */
4156
4157 static void
4158 compute_pre_data ()
4159 {
4160   sbitmap trapping_expr;
4161   int i;
4162   unsigned int ui;
4163
4164   compute_local_properties (transp, comp, antloc, 0);
4165   sbitmap_vector_zero (ae_kill, n_basic_blocks);
4166
4167   /* Collect expressions which might trap.  */
4168   trapping_expr = sbitmap_alloc (n_exprs);
4169   sbitmap_zero (trapping_expr);
4170   for (ui = 0; ui < expr_hash_table_size; ui++)
4171     {
4172       struct expr *e;
4173       for (e = expr_hash_table[ui]; e != NULL; e = e->next_same_hash)
4174         if (may_trap_p (e->expr))
4175           SET_BIT (trapping_expr, e->bitmap_index);
4176     }
4177
4178   /* Compute ae_kill for each basic block using:
4179
4180      ~(TRANSP | COMP)
4181
4182      This is significantly faster than compute_ae_kill.  */
4183
4184   for (i = 0; i < n_basic_blocks; i++)
4185     {
4186       edge e;
4187
4188       /* If the current block is the destination of an abnormal edge, we
4189          kill all trapping expressions because we won't be able to properly
4190          place the instruction on the edge.  So make them neither
4191          anticipatable nor transparent.  This is fairly conservative.  */
4192       for (e = BASIC_BLOCK (i)->pred; e ; e = e->pred_next)
4193         if (e->flags & EDGE_ABNORMAL)
4194           {
4195             sbitmap_difference (antloc[i], antloc[i], trapping_expr);
4196             sbitmap_difference (transp[i], transp[i], trapping_expr);
4197             break;
4198           }
4199
4200       sbitmap_a_or_b (ae_kill[i], transp[i], comp[i]);
4201       sbitmap_not (ae_kill[i], ae_kill[i]);
4202     }
4203
4204   edge_list = pre_edge_lcm (gcse_file, n_exprs, transp, comp, antloc,
4205                             ae_kill, &pre_insert_map, &pre_delete_map);
4206   free (antloc);
4207   antloc = NULL;
4208   free (ae_kill);
4209   ae_kill = NULL;
4210   free (trapping_expr);
4211 }
4212 \f
4213 /* PRE utilities */
4214
4215 /* Return non-zero if an occurrence of expression EXPR in OCCR_BB would reach
4216    block BB.
4217
4218    VISITED is a pointer to a working buffer for tracking which BB's have
4219    been visited.  It is NULL for the top-level call.
4220
4221    We treat reaching expressions that go through blocks containing the same
4222    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
4223    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4224    2 as not reaching.  The intent is to improve the probability of finding
4225    only one reaching expression and to reduce register lifetimes by picking
4226    the closest such expression.  */
4227
4228 static int
4229 pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited)
4230      int occr_bb;
4231      struct expr *expr;
4232      int bb;
4233      char *visited;
4234 {
4235   edge pred;
4236
4237   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
4238     {
4239       int pred_bb = pred->src->index;
4240
4241       if (pred->src == ENTRY_BLOCK_PTR
4242           /* Has predecessor has already been visited?  */
4243           || visited[pred_bb])
4244         ;/* Nothing to do.  */
4245
4246       /* Does this predecessor generate this expression?  */
4247       else if (TEST_BIT (comp[pred_bb], expr->bitmap_index))
4248         {
4249           /* Is this the occurrence we're looking for?
4250              Note that there's only one generating occurrence per block
4251              so we just need to check the block number.  */
4252           if (occr_bb == pred_bb)
4253             return 1;
4254
4255           visited[pred_bb] = 1;
4256         }
4257       /* Ignore this predecessor if it kills the expression.  */
4258       else if (! TEST_BIT (transp[pred_bb], expr->bitmap_index))
4259         visited[pred_bb] = 1;
4260
4261       /* Neither gen nor kill.  */
4262       else
4263         {
4264           visited[pred_bb] = 1;
4265           if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
4266             return 1;
4267         }
4268     }
4269
4270   /* All paths have been checked.  */
4271   return 0;
4272 }
4273
4274 /* The wrapper for pre_expr_reaches_here_work that ensures that any
4275    memory allocated for that function is returned. */
4276
4277 static int
4278 pre_expr_reaches_here_p (occr_bb, expr, bb)
4279      int occr_bb;
4280      struct expr *expr;
4281      int bb;
4282 {
4283   int rval;
4284   char *visited = (char *) xcalloc (n_basic_blocks, 1);
4285
4286   rval = pre_expr_reaches_here_p_work(occr_bb, expr, bb, visited);
4287
4288   free (visited);
4289   return rval;
4290 }
4291 \f
4292
4293 /* Given an expr, generate RTL which we can insert at the end of a BB,
4294    or on an edge.  Set the block number of any insns generated to
4295    the value of BB.  */
4296
4297 static rtx
4298 process_insert_insn (expr)
4299      struct expr *expr;
4300 {
4301   rtx reg = expr->reaching_reg;
4302   rtx pat, copied_expr;
4303   rtx first_new_insn;
4304
4305   start_sequence ();
4306   copied_expr = copy_rtx (expr->expr);
4307   emit_move_insn (reg, copied_expr);
4308   first_new_insn = get_insns ();
4309   pat = gen_sequence ();
4310   end_sequence ();
4311
4312   return pat;
4313 }
4314
4315 /* Add EXPR to the end of basic block BB.
4316
4317    This is used by both the PRE and code hoisting.
4318
4319    For PRE, we want to verify that the expr is either transparent
4320    or locally anticipatable in the target block.  This check makes
4321    no sense for code hoisting.  */
4322
4323 static void
4324 insert_insn_end_bb (expr, bb, pre)
4325      struct expr *expr;
4326      int bb;
4327      int pre;
4328 {
4329   rtx insn = BLOCK_END (bb);
4330   rtx new_insn;
4331   rtx reg = expr->reaching_reg;
4332   int regno = REGNO (reg);
4333   rtx pat;
4334   int i;
4335
4336   pat = process_insert_insn (expr);
4337
4338   /* If the last insn is a jump, insert EXPR in front [taking care to
4339      handle cc0, etc. properly].  */
4340
4341   if (GET_CODE (insn) == JUMP_INSN)
4342     {
4343 #ifdef HAVE_cc0
4344       rtx note;
4345 #endif
4346
4347       /* If this is a jump table, then we can't insert stuff here.  Since
4348          we know the previous real insn must be the tablejump, we insert
4349          the new instruction just before the tablejump.  */
4350       if (GET_CODE (PATTERN (insn)) == ADDR_VEC
4351           || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
4352         insn = prev_real_insn (insn);
4353
4354 #ifdef HAVE_cc0
4355       /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4356          if cc0 isn't set.  */
4357       note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
4358       if (note)
4359         insn = XEXP (note, 0);
4360       else
4361         {
4362           rtx maybe_cc0_setter = prev_nonnote_insn (insn);
4363           if (maybe_cc0_setter
4364               && INSN_P (maybe_cc0_setter)
4365               && sets_cc0_p (PATTERN (maybe_cc0_setter)))
4366             insn = maybe_cc0_setter;
4367         }
4368 #endif
4369       /* FIXME: What if something in cc0/jump uses value set in new insn?  */
4370       new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4371     }
4372
4373   /* Likewise if the last insn is a call, as will happen in the presence
4374      of exception handling.  */
4375   else if (GET_CODE (insn) == CALL_INSN)
4376     {
4377       HARD_REG_SET parm_regs;
4378       int nparm_regs;
4379       rtx p;
4380
4381       /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4382          we search backward and place the instructions before the first
4383          parameter is loaded.  Do this for everyone for consistency and a
4384          presumtion that we'll get better code elsewhere as well.
4385
4386          It should always be the case that we can put these instructions
4387          anywhere in the basic block with performing PRE optimizations.
4388          Check this.  */
4389
4390       if (pre
4391           && !TEST_BIT (antloc[bb], expr->bitmap_index)
4392           && !TEST_BIT (transp[bb], expr->bitmap_index))
4393         abort ();
4394
4395       /* Since different machines initialize their parameter registers
4396          in different orders, assume nothing.  Collect the set of all
4397          parameter registers.  */
4398       CLEAR_HARD_REG_SET (parm_regs);
4399       nparm_regs = 0;
4400       for (p = CALL_INSN_FUNCTION_USAGE (insn); p ; p = XEXP (p, 1))
4401         if (GET_CODE (XEXP (p, 0)) == USE
4402             && GET_CODE (XEXP (XEXP (p, 0), 0)) == REG)
4403           {
4404             if (REGNO (XEXP (XEXP (p, 0), 0)) >= FIRST_PSEUDO_REGISTER)
4405               abort ();
4406
4407             SET_HARD_REG_BIT (parm_regs, REGNO (XEXP (XEXP (p, 0), 0)));
4408             nparm_regs++;
4409           }
4410
4411       /* Search backward for the first set of a register in this set.  */
4412       while (nparm_regs && BLOCK_HEAD (bb) != insn)
4413         {
4414           insn = PREV_INSN (insn);
4415           p = single_set (insn);
4416           if (p && GET_CODE (SET_DEST (p)) == REG
4417               && REGNO (SET_DEST (p)) < FIRST_PSEUDO_REGISTER
4418               && TEST_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p))))
4419             {
4420               CLEAR_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p)));
4421               nparm_regs--;
4422             }
4423         }
4424
4425       /* If we found all the parameter loads, then we want to insert
4426          before the first parameter load.
4427
4428          If we did not find all the parameter loads, then we might have
4429          stopped on the head of the block, which could be a CODE_LABEL.
4430          If we inserted before the CODE_LABEL, then we would be putting
4431          the insn in the wrong basic block.  In that case, put the insn
4432          after the CODE_LABEL.  Also, respect NOTE_INSN_BASIC_BLOCK.  */
4433       while (GET_CODE (insn) == CODE_LABEL
4434              || NOTE_INSN_BASIC_BLOCK_P (insn))
4435         insn = NEXT_INSN (insn);
4436
4437       new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4438     }
4439   else
4440     {
4441       new_insn = emit_insn_after (pat, insn);
4442       BLOCK_END (bb) = new_insn;
4443     }
4444
4445   /* Keep block number table up to date.
4446      Note, PAT could be a multiple insn sequence, we have to make
4447      sure that each insn in the sequence is handled.  */
4448   if (GET_CODE (pat) == SEQUENCE)
4449     {
4450       for (i = 0; i < XVECLEN (pat, 0); i++)
4451         {
4452           rtx insn = XVECEXP (pat, 0, i);
4453
4454           set_block_num (insn, bb);
4455           if (INSN_P (insn))
4456             add_label_notes (PATTERN (insn), new_insn);
4457
4458           note_stores (PATTERN (insn), record_set_info, insn);
4459         }
4460     }
4461   else
4462     {
4463       add_label_notes (SET_SRC (pat), new_insn);
4464       set_block_num (new_insn, bb);
4465
4466       /* Keep register set table up to date.  */
4467       record_one_set (regno, new_insn);
4468     }
4469
4470   gcse_create_count++;
4471
4472   if (gcse_file)
4473     {
4474       fprintf (gcse_file, "PRE/HOIST: end of bb %d, insn %d, ",
4475                bb, INSN_UID (new_insn));
4476       fprintf (gcse_file, "copying expression %d to reg %d\n",
4477                expr->bitmap_index, regno);
4478     }
4479 }
4480
4481 /* Insert partially redundant expressions on edges in the CFG to make
4482    the expressions fully redundant.  */
4483
4484 static int
4485 pre_edge_insert (edge_list, index_map)
4486      struct edge_list *edge_list;
4487      struct expr **index_map;
4488 {
4489   int e, i, j, num_edges, set_size, did_insert = 0;
4490   sbitmap *inserted;
4491
4492   /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
4493      if it reaches any of the deleted expressions.  */
4494
4495   set_size = pre_insert_map[0]->size;
4496   num_edges = NUM_EDGES (edge_list);
4497   inserted = sbitmap_vector_alloc (num_edges, n_exprs);
4498   sbitmap_vector_zero (inserted, num_edges);
4499
4500   for (e = 0; e < num_edges; e++)
4501     {
4502       int indx;
4503       basic_block pred = INDEX_EDGE_PRED_BB (edge_list, e);
4504       int bb = pred->index;
4505
4506       for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
4507         {
4508           SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
4509
4510           for (j = indx; insert && j < n_exprs; j++, insert >>= 1)
4511             if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
4512               {
4513                 struct expr *expr = index_map[j];
4514                 struct occr *occr;
4515
4516                 /* Now look at each deleted occurence of this expression.  */
4517                 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4518                   {
4519                     if (! occr->deleted_p)
4520                       continue;
4521
4522                     /* Insert this expression on this edge if if it would
4523                        reach the deleted occurence in BB.  */
4524                     if (!TEST_BIT (inserted[e], j))
4525                       {
4526                         rtx insn;
4527                         edge eg = INDEX_EDGE (edge_list, e);
4528
4529                         /* We can't insert anything on an abnormal and
4530                            critical edge, so we insert the insn at the end of
4531                            the previous block. There are several alternatives
4532                            detailed in Morgans book P277 (sec 10.5) for
4533                            handling this situation.  This one is easiest for
4534                            now.  */
4535
4536                         if ((eg->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL)
4537                           insert_insn_end_bb (index_map[j], bb, 0);
4538                         else
4539                           {
4540                             insn = process_insert_insn (index_map[j]);
4541                             insert_insn_on_edge (insn, eg);
4542                           }
4543
4544                         if (gcse_file)
4545                           {
4546                             fprintf (gcse_file, "PRE/HOIST: edge (%d,%d), ",
4547                                      bb,
4548                                      INDEX_EDGE_SUCC_BB (edge_list, e)->index);
4549                             fprintf (gcse_file, "copy expression %d\n",
4550                                      expr->bitmap_index);
4551                           }
4552
4553                         SET_BIT (inserted[e], j);
4554                         did_insert = 1;
4555                         gcse_create_count++;
4556                       }
4557                   }
4558               }
4559         }
4560     }
4561
4562   free (inserted);
4563   return did_insert;
4564 }
4565
4566 /* Copy the result of INSN to REG.  INDX is the expression number.  */
4567
4568 static void
4569 pre_insert_copy_insn (expr, insn)
4570      struct expr *expr;
4571      rtx insn;
4572 {
4573   rtx reg = expr->reaching_reg;
4574   int regno = REGNO (reg);
4575   int indx = expr->bitmap_index;
4576   rtx set = single_set (insn);
4577   rtx new_insn;
4578   int bb = BLOCK_NUM (insn);
4579
4580   if (!set)
4581     abort ();
4582
4583   new_insn = emit_insn_after (gen_rtx_SET (VOIDmode, reg, SET_DEST (set)),
4584                               insn);
4585
4586   /* Keep block number table up to date.  */
4587   set_block_num (new_insn, bb);
4588
4589   /* Keep register set table up to date.  */
4590   record_one_set (regno, new_insn);
4591   if (insn == BLOCK_END (bb))
4592     BLOCK_END (bb) = new_insn;
4593
4594   gcse_create_count++;
4595
4596   if (gcse_file)
4597     fprintf (gcse_file,
4598              "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
4599               BLOCK_NUM (insn), INSN_UID (new_insn), indx,
4600               INSN_UID (insn), regno);
4601 }
4602
4603 /* Copy available expressions that reach the redundant expression
4604    to `reaching_reg'.  */
4605
4606 static void
4607 pre_insert_copies ()
4608 {
4609   unsigned int i;
4610   struct expr *expr;
4611   struct occr *occr;
4612   struct occr *avail;
4613
4614   /* For each available expression in the table, copy the result to
4615      `reaching_reg' if the expression reaches a deleted one.
4616
4617      ??? The current algorithm is rather brute force.
4618      Need to do some profiling.  */
4619
4620   for (i = 0; i < expr_hash_table_size; i++)
4621     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4622       {
4623         /* If the basic block isn't reachable, PPOUT will be TRUE.  However,
4624            we don't want to insert a copy here because the expression may not
4625            really be redundant.  So only insert an insn if the expression was
4626            deleted.  This test also avoids further processing if the
4627            expression wasn't deleted anywhere.  */
4628         if (expr->reaching_reg == NULL)
4629           continue;
4630
4631         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4632           {
4633             if (! occr->deleted_p)
4634               continue;
4635
4636             for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
4637               {
4638                 rtx insn = avail->insn;
4639
4640                 /* No need to handle this one if handled already.  */
4641                 if (avail->copied_p)
4642                   continue;
4643
4644                 /* Don't handle this one if it's a redundant one.  */
4645                 if (TEST_BIT (pre_redundant_insns, INSN_CUID (insn)))
4646                   continue;
4647
4648                 /* Or if the expression doesn't reach the deleted one.  */
4649                 if (! pre_expr_reaches_here_p (BLOCK_NUM (avail->insn), expr,
4650                                                BLOCK_NUM (occr->insn)))
4651                   continue;
4652
4653                 /* Copy the result of avail to reaching_reg.  */
4654                 pre_insert_copy_insn (expr, insn);
4655                 avail->copied_p = 1;
4656               }
4657           }
4658       }
4659 }
4660
4661 /* Delete redundant computations.
4662    Deletion is done by changing the insn to copy the `reaching_reg' of
4663    the expression into the result of the SET.  It is left to later passes
4664    (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4665
4666    Returns non-zero if a change is made.  */
4667
4668 static int
4669 pre_delete ()
4670 {
4671   unsigned int i;
4672   int changed;
4673   struct expr *expr;
4674   struct occr *occr;
4675
4676   changed = 0;
4677   for (i = 0; i < expr_hash_table_size; i++)
4678     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4679       {
4680         int indx = expr->bitmap_index;
4681
4682         /* We only need to search antic_occr since we require
4683            ANTLOC != 0.  */
4684
4685         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4686           {
4687             rtx insn = occr->insn;
4688             rtx set;
4689             int bb = BLOCK_NUM (insn);
4690
4691             if (TEST_BIT (pre_delete_map[bb], indx))
4692               {
4693                 set = single_set (insn);
4694                 if (! set)
4695                   abort ();
4696
4697                 /* Create a pseudo-reg to store the result of reaching
4698                    expressions into.  Get the mode for the new pseudo from
4699                    the mode of the original destination pseudo.  */
4700                 if (expr->reaching_reg == NULL)
4701                   expr->reaching_reg
4702                     = gen_reg_rtx (GET_MODE (SET_DEST (set)));
4703
4704                 /* In theory this should never fail since we're creating
4705                    a reg->reg copy.
4706
4707                    However, on the x86 some of the movXX patterns actually
4708                    contain clobbers of scratch regs.  This may cause the
4709                    insn created by validate_change to not match any pattern
4710                    and thus cause validate_change to fail.   */
4711                 if (validate_change (insn, &SET_SRC (set),
4712                                      expr->reaching_reg, 0))
4713                   {
4714                     occr->deleted_p = 1;
4715                     SET_BIT (pre_redundant_insns, INSN_CUID (insn));
4716                     changed = 1;
4717                     gcse_subst_count++;
4718                   }
4719
4720                 if (gcse_file)
4721                   {
4722                     fprintf (gcse_file,
4723                              "PRE: redundant insn %d (expression %d) in ",
4724                                INSN_UID (insn), indx);
4725                     fprintf (gcse_file, "bb %d, reaching reg is %d\n",
4726                              bb, REGNO (expr->reaching_reg));
4727                   }
4728               }
4729           }
4730       }
4731
4732   return changed;
4733 }
4734
4735 /* Perform GCSE optimizations using PRE.
4736    This is called by one_pre_gcse_pass after all the dataflow analysis
4737    has been done.
4738
4739    This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
4740    lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
4741    Compiler Design and Implementation.
4742
4743    ??? A new pseudo reg is created to hold the reaching expression.  The nice
4744    thing about the classical approach is that it would try to use an existing
4745    reg.  If the register can't be adequately optimized [i.e. we introduce
4746    reload problems], one could add a pass here to propagate the new register
4747    through the block.
4748
4749    ??? We don't handle single sets in PARALLELs because we're [currently] not
4750    able to copy the rest of the parallel when we insert copies to create full
4751    redundancies from partial redundancies.  However, there's no reason why we
4752    can't handle PARALLELs in the cases where there are no partial
4753    redundancies.  */
4754
4755 static int
4756 pre_gcse ()
4757 {
4758   unsigned int i;
4759   int did_insert, changed;
4760   struct expr **index_map;
4761   struct expr *expr;
4762
4763   /* Compute a mapping from expression number (`bitmap_index') to
4764      hash table entry.  */
4765
4766   index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
4767   for (i = 0; i < expr_hash_table_size; i++)
4768     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4769       index_map[expr->bitmap_index] = expr;
4770
4771   /* Reset bitmap used to track which insns are redundant.  */
4772   pre_redundant_insns = sbitmap_alloc (max_cuid);
4773   sbitmap_zero (pre_redundant_insns);
4774
4775   /* Delete the redundant insns first so that
4776      - we know what register to use for the new insns and for the other
4777        ones with reaching expressions
4778      - we know which insns are redundant when we go to create copies  */
4779
4780   changed = pre_delete ();
4781
4782   did_insert = pre_edge_insert (edge_list, index_map);
4783
4784   /* In other places with reaching expressions, copy the expression to the
4785      specially allocated pseudo-reg that reaches the redundant expr.  */
4786   pre_insert_copies ();
4787   if (did_insert)
4788     {
4789       commit_edge_insertions ();
4790       changed = 1;
4791     }
4792
4793   free (index_map);
4794   free (pre_redundant_insns);
4795   return changed;
4796 }
4797
4798 /* Top level routine to perform one PRE GCSE pass.
4799
4800    Return non-zero if a change was made.  */
4801
4802 static int
4803 one_pre_gcse_pass (pass)
4804      int pass;
4805 {
4806   int changed = 0;
4807
4808   gcse_subst_count = 0;
4809   gcse_create_count = 0;
4810
4811   alloc_expr_hash_table (max_cuid);
4812   add_noreturn_fake_exit_edges ();
4813   compute_expr_hash_table ();
4814   if (gcse_file)
4815     dump_hash_table (gcse_file, "Expression", expr_hash_table,
4816                      expr_hash_table_size, n_exprs);
4817
4818   if (n_exprs > 0)
4819     {
4820       alloc_pre_mem (n_basic_blocks, n_exprs);
4821       compute_pre_data ();
4822       changed |= pre_gcse ();
4823       free_edge_list (edge_list);
4824       free_pre_mem ();
4825     }
4826
4827   remove_fake_edges ();
4828   free_expr_hash_table ();
4829
4830   if (gcse_file)
4831     {
4832       fprintf (gcse_file, "\nPRE GCSE of %s, pass %d: %d bytes needed, ",
4833                current_function_name, pass, bytes_used);
4834       fprintf (gcse_file, "%d substs, %d insns created\n",
4835                gcse_subst_count, gcse_create_count);
4836     }
4837
4838   return changed;
4839 }
4840 \f
4841 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
4842    If notes are added to an insn which references a CODE_LABEL, the
4843    LABEL_NUSES count is incremented.  We have to add REG_LABEL notes,
4844    because the following loop optimization pass requires them.  */
4845
4846 /* ??? This is very similar to the loop.c add_label_notes function.  We
4847    could probably share code here.  */
4848
4849 /* ??? If there was a jump optimization pass after gcse and before loop,
4850    then we would not need to do this here, because jump would add the
4851    necessary REG_LABEL notes.  */
4852
4853 static void
4854 add_label_notes (x, insn)
4855      rtx x;
4856      rtx insn;
4857 {
4858   enum rtx_code code = GET_CODE (x);
4859   int i, j;
4860   const char *fmt;
4861
4862   if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4863     {
4864       /* This code used to ignore labels that referred to dispatch tables to
4865          avoid flow generating (slighly) worse code.
4866
4867          We no longer ignore such label references (see LABEL_REF handling in
4868          mark_jump_label for additional information).  */
4869
4870       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, XEXP (x, 0),
4871                                             REG_NOTES (insn));
4872       if (LABEL_P (XEXP (x, 0)))
4873         LABEL_NUSES (XEXP (x, 0))++;
4874       return;
4875     }
4876
4877   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
4878     {
4879       if (fmt[i] == 'e')
4880         add_label_notes (XEXP (x, i), insn);
4881       else if (fmt[i] == 'E')
4882         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4883           add_label_notes (XVECEXP (x, i, j), insn);
4884     }
4885 }
4886
4887 /* Compute transparent outgoing information for each block.
4888
4889    An expression is transparent to an edge unless it is killed by
4890    the edge itself.  This can only happen with abnormal control flow,
4891    when the edge is traversed through a call.  This happens with
4892    non-local labels and exceptions.
4893
4894    This would not be necessary if we split the edge.  While this is
4895    normally impossible for abnormal critical edges, with some effort
4896    it should be possible with exception handling, since we still have
4897    control over which handler should be invoked.  But due to increased
4898    EH table sizes, this may not be worthwhile.  */
4899
4900 static void
4901 compute_transpout ()
4902 {
4903   int bb;
4904   unsigned int i;
4905   struct expr *expr;
4906
4907   sbitmap_vector_ones (transpout, n_basic_blocks);
4908
4909   for (bb = 0; bb < n_basic_blocks; ++bb)
4910     {
4911       /* Note that flow inserted a nop a the end of basic blocks that
4912          end in call instructions for reasons other than abnormal
4913          control flow.  */
4914       if (GET_CODE (BLOCK_END (bb)) != CALL_INSN)
4915         continue;
4916
4917       for (i = 0; i < expr_hash_table_size; i++)
4918         for (expr = expr_hash_table[i]; expr ; expr = expr->next_same_hash)
4919           if (GET_CODE (expr->expr) == MEM)
4920             {
4921               if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
4922                   && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
4923                 continue;
4924
4925               /* ??? Optimally, we would use interprocedural alias
4926                  analysis to determine if this mem is actually killed
4927                  by this call.  */
4928               RESET_BIT (transpout[bb], expr->bitmap_index);
4929             }
4930     }
4931 }
4932
4933 /* Removal of useless null pointer checks */
4934
4935 /* Called via note_stores.  X is set by SETTER.  If X is a register we must
4936    invalidate nonnull_local and set nonnull_killed.  DATA is really a
4937    `null_pointer_info *'.
4938
4939    We ignore hard registers.  */
4940
4941 static void
4942 invalidate_nonnull_info (x, setter, data)
4943      rtx x;
4944      rtx setter ATTRIBUTE_UNUSED;
4945      void *data;
4946 {
4947   unsigned int regno;
4948   struct null_pointer_info *npi = (struct null_pointer_info *) data;
4949
4950   while (GET_CODE (x) == SUBREG)
4951     x = SUBREG_REG (x);
4952
4953   /* Ignore anything that is not a register or is a hard register.  */
4954   if (GET_CODE (x) != REG
4955       || REGNO (x) < npi->min_reg
4956       || REGNO (x) >= npi->max_reg)
4957     return;
4958
4959   regno = REGNO (x) - npi->min_reg;
4960
4961   RESET_BIT (npi->nonnull_local[npi->current_block], regno);
4962   SET_BIT (npi->nonnull_killed[npi->current_block], regno);
4963 }
4964
4965 /* Do null-pointer check elimination for the registers indicated in
4966    NPI.  NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps;
4967    they are not our responsibility to free.  */
4968
4969 static void
4970 delete_null_pointer_checks_1 (block_reg, nonnull_avin, nonnull_avout, npi)
4971      unsigned int *block_reg;
4972      sbitmap *nonnull_avin;
4973      sbitmap *nonnull_avout;
4974      struct null_pointer_info *npi;
4975 {
4976   int bb;
4977   int current_block;
4978   sbitmap *nonnull_local = npi->nonnull_local;
4979   sbitmap *nonnull_killed = npi->nonnull_killed;
4980
4981   /* Compute local properties, nonnull and killed.  A register will have
4982      the nonnull property if at the end of the current block its value is
4983      known to be nonnull.  The killed property indicates that somewhere in
4984      the block any information we had about the register is killed.
4985
4986      Note that a register can have both properties in a single block.  That
4987      indicates that it's killed, then later in the block a new value is
4988      computed.  */
4989   sbitmap_vector_zero (nonnull_local, n_basic_blocks);
4990   sbitmap_vector_zero (nonnull_killed, n_basic_blocks);
4991
4992   for (current_block = 0; current_block < n_basic_blocks; current_block++)
4993     {
4994       rtx insn, stop_insn;
4995
4996       /* Set the current block for invalidate_nonnull_info.  */
4997       npi->current_block = current_block;
4998
4999       /* Scan each insn in the basic block looking for memory references and
5000          register sets.  */
5001       stop_insn = NEXT_INSN (BLOCK_END (current_block));
5002       for (insn = BLOCK_HEAD (current_block);
5003            insn != stop_insn;
5004            insn = NEXT_INSN (insn))
5005         {
5006           rtx set;
5007           rtx reg;
5008
5009           /* Ignore anything that is not a normal insn.  */
5010           if (! INSN_P (insn))
5011             continue;
5012
5013           /* Basically ignore anything that is not a simple SET.  We do have
5014              to make sure to invalidate nonnull_local and set nonnull_killed
5015              for such insns though.  */
5016           set = single_set (insn);
5017           if (!set)
5018             {
5019               note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
5020               continue;
5021             }
5022
5023           /* See if we've got a useable memory load.  We handle it first
5024              in case it uses its address register as a dest (which kills
5025              the nonnull property).  */
5026           if (GET_CODE (SET_SRC (set)) == MEM
5027               && GET_CODE ((reg = XEXP (SET_SRC (set), 0))) == REG
5028               && REGNO (reg) >= npi->min_reg
5029               && REGNO (reg) < npi->max_reg)
5030             SET_BIT (nonnull_local[current_block],
5031                      REGNO (reg) - npi->min_reg);
5032
5033           /* Now invalidate stuff clobbered by this insn.  */
5034           note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
5035
5036           /* And handle stores, we do these last since any sets in INSN can
5037              not kill the nonnull property if it is derived from a MEM
5038              appearing in a SET_DEST.  */
5039           if (GET_CODE (SET_DEST (set)) == MEM
5040               && GET_CODE ((reg = XEXP (SET_DEST (set), 0))) == REG
5041               && REGNO (reg) >= npi->min_reg
5042               && REGNO (reg) < npi->max_reg)
5043             SET_BIT (nonnull_local[current_block],
5044                      REGNO (reg) - npi->min_reg);
5045         }
5046     }
5047
5048   /* Now compute global properties based on the local properties.   This
5049      is a classic global availablity algorithm.  */
5050   compute_available (nonnull_local, nonnull_killed,
5051                      nonnull_avout, nonnull_avin);
5052
5053   /* Now look at each bb and see if it ends with a compare of a value
5054      against zero.  */
5055   for (bb = 0; bb < n_basic_blocks; bb++)
5056     {
5057       rtx last_insn = BLOCK_END (bb);
5058       rtx condition, earliest;
5059       int compare_and_branch;
5060
5061       /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and
5062          since BLOCK_REG[BB] is zero if this block did not end with a
5063          comparison against zero, this condition works.  */
5064       if (block_reg[bb] < npi->min_reg
5065           || block_reg[bb] >= npi->max_reg)
5066         continue;
5067
5068       /* LAST_INSN is a conditional jump.  Get its condition.  */
5069       condition = get_condition (last_insn, &earliest);
5070
5071       /* If we can't determine the condition then skip.  */
5072       if (! condition)
5073         continue;
5074
5075       /* Is the register known to have a nonzero value?  */
5076       if (!TEST_BIT (nonnull_avout[bb], block_reg[bb] - npi->min_reg))
5077         continue;
5078
5079       /* Try to compute whether the compare/branch at the loop end is one or
5080          two instructions.  */
5081       if (earliest == last_insn)
5082         compare_and_branch = 1;
5083       else if (earliest == prev_nonnote_insn (last_insn))
5084         compare_and_branch = 2;
5085       else
5086         continue;
5087
5088       /* We know the register in this comparison is nonnull at exit from
5089          this block.  We can optimize this comparison.  */
5090       if (GET_CODE (condition) == NE)
5091         {
5092           rtx new_jump;
5093
5094           new_jump = emit_jump_insn_before (gen_jump (JUMP_LABEL (last_insn)),
5095                                             last_insn);
5096           JUMP_LABEL (new_jump) = JUMP_LABEL (last_insn);
5097           LABEL_NUSES (JUMP_LABEL (new_jump))++;
5098           emit_barrier_after (new_jump);
5099         }
5100       delete_insn (last_insn);
5101       if (compare_and_branch == 2)
5102         delete_insn (earliest);
5103
5104       /* Don't check this block again.  (Note that BLOCK_END is
5105          invalid here; we deleted the last instruction in the
5106          block.)  */
5107       block_reg[bb] = 0;
5108     }
5109 }
5110
5111 /* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
5112    at compile time.
5113
5114    This is conceptually similar to global constant/copy propagation and
5115    classic global CSE (it even uses the same dataflow equations as cprop).
5116
5117    If a register is used as memory address with the form (mem (reg)), then we
5118    know that REG can not be zero at that point in the program.  Any instruction
5119    which sets REG "kills" this property.
5120
5121    So, if every path leading to a conditional branch has an available memory
5122    reference of that form, then we know the register can not have the value
5123    zero at the conditional branch.
5124
5125    So we merely need to compute the local properies and propagate that data
5126    around the cfg, then optimize where possible.
5127
5128    We run this pass two times.  Once before CSE, then again after CSE.  This
5129    has proven to be the most profitable approach.  It is rare for new
5130    optimization opportunities of this nature to appear after the first CSE
5131    pass.
5132
5133    This could probably be integrated with global cprop with a little work.  */
5134
5135 void
5136 delete_null_pointer_checks (f)
5137      rtx f ATTRIBUTE_UNUSED;
5138 {
5139   sbitmap *nonnull_avin, *nonnull_avout;
5140   unsigned int *block_reg;
5141   int bb;
5142   int reg;
5143   int regs_per_pass;
5144   int max_reg;
5145   struct null_pointer_info npi;
5146
5147   /* If we have only a single block, then there's nothing to do.  */
5148   if (n_basic_blocks <= 1)
5149     return;
5150
5151   /* Trying to perform global optimizations on flow graphs which have
5152      a high connectivity will take a long time and is unlikely to be
5153      particularly useful.
5154
5155      In normal circumstances a cfg should have about twice has many edges
5156      as blocks.  But we do not want to punish small functions which have
5157      a couple switch statements.  So we require a relatively large number
5158      of basic blocks and the ratio of edges to blocks to be high.  */
5159   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
5160     return;
5161
5162   /* We need four bitmaps, each with a bit for each register in each
5163      basic block.  */
5164   max_reg = max_reg_num ();
5165   regs_per_pass = get_bitmap_width (4, n_basic_blocks, max_reg);
5166
5167   /* Allocate bitmaps to hold local and global properties.  */
5168   npi.nonnull_local = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5169   npi.nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5170   nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5171   nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5172
5173   /* Go through the basic blocks, seeing whether or not each block
5174      ends with a conditional branch whose condition is a comparison
5175      against zero.  Record the register compared in BLOCK_REG.  */
5176   block_reg = (unsigned int *) xcalloc (n_basic_blocks, sizeof (int));
5177   for (bb = 0; bb < n_basic_blocks; bb++)
5178     {
5179       rtx last_insn = BLOCK_END (bb);
5180       rtx condition, earliest, reg;
5181
5182       /* We only want conditional branches.  */
5183       if (GET_CODE (last_insn) != JUMP_INSN
5184           || !any_condjump_p (last_insn)
5185           || !onlyjump_p (last_insn))
5186         continue;
5187
5188       /* LAST_INSN is a conditional jump.  Get its condition.  */
5189       condition = get_condition (last_insn, &earliest);
5190
5191       /* If we were unable to get the condition, or it is not a equality
5192          comparison against zero then there's nothing we can do.  */
5193       if (!condition
5194           || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
5195           || GET_CODE (XEXP (condition, 1)) != CONST_INT
5196           || (XEXP (condition, 1)
5197               != CONST0_RTX (GET_MODE (XEXP (condition, 0)))))
5198         continue;
5199
5200       /* We must be checking a register against zero.  */
5201       reg = XEXP (condition, 0);
5202       if (GET_CODE (reg) != REG)
5203         continue;
5204
5205       block_reg[bb] = REGNO (reg);
5206     }
5207
5208   /* Go through the algorithm for each block of registers.  */
5209   for (reg = FIRST_PSEUDO_REGISTER; reg < max_reg; reg += regs_per_pass)
5210     {
5211       npi.min_reg = reg;
5212       npi.max_reg = MIN (reg + regs_per_pass, max_reg);
5213       delete_null_pointer_checks_1 (block_reg, nonnull_avin,
5214                                     nonnull_avout, &npi);
5215     }
5216
5217   /* Free the table of registers compared at the end of every block.  */
5218   free (block_reg);
5219
5220   /* Free bitmaps.  */
5221   free (npi.nonnull_local);
5222   free (npi.nonnull_killed);
5223   free (nonnull_avin);
5224   free (nonnull_avout);
5225 }
5226
5227 /* Code Hoisting variables and subroutines.  */
5228
5229 /* Very busy expressions.  */
5230 static sbitmap *hoist_vbein;
5231 static sbitmap *hoist_vbeout;
5232
5233 /* Hoistable expressions.  */
5234 static sbitmap *hoist_exprs;
5235
5236 /* Dominator bitmaps.  */
5237 static sbitmap *dominators;
5238
5239 /* ??? We could compute post dominators and run this algorithm in
5240    reverse to to perform tail merging, doing so would probably be
5241    more effective than the tail merging code in jump.c.
5242
5243    It's unclear if tail merging could be run in parallel with
5244    code hoisting.  It would be nice.  */
5245
5246 /* Allocate vars used for code hoisting analysis.  */
5247
5248 static void
5249 alloc_code_hoist_mem (n_blocks, n_exprs)
5250      int n_blocks, n_exprs;
5251 {
5252   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
5253   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
5254   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
5255
5256   hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
5257   hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
5258   hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
5259   transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
5260
5261   dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
5262 }
5263
5264 /* Free vars used for code hoisting analysis.  */
5265
5266 static void
5267 free_code_hoist_mem ()
5268 {
5269   free (antloc);
5270   free (transp);
5271   free (comp);
5272
5273   free (hoist_vbein);
5274   free (hoist_vbeout);
5275   free (hoist_exprs);
5276   free (transpout);
5277
5278   free (dominators);
5279 }
5280
5281 /* Compute the very busy expressions at entry/exit from each block.
5282
5283    An expression is very busy if all paths from a given point
5284    compute the expression.  */
5285
5286 static void
5287 compute_code_hoist_vbeinout ()
5288 {
5289   int bb, changed, passes;
5290
5291   sbitmap_vector_zero (hoist_vbeout, n_basic_blocks);
5292   sbitmap_vector_zero (hoist_vbein, n_basic_blocks);
5293
5294   passes = 0;
5295   changed = 1;
5296
5297   while (changed)
5298     {
5299       changed = 0;
5300
5301       /* We scan the blocks in the reverse order to speed up
5302          the convergence.  */
5303       for (bb = n_basic_blocks - 1; bb >= 0; bb--)
5304         {
5305           changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb],
5306                                            hoist_vbeout[bb], transp[bb]);
5307           if (bb != n_basic_blocks - 1)
5308             sbitmap_intersection_of_succs (hoist_vbeout[bb], hoist_vbein, bb);
5309         }
5310
5311       passes++;
5312     }
5313
5314   if (gcse_file)
5315     fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes);
5316 }
5317
5318 /* Top level routine to do the dataflow analysis needed by code hoisting.  */
5319
5320 static void
5321 compute_code_hoist_data ()
5322 {
5323   compute_local_properties (transp, comp, antloc, 0);
5324   compute_transpout ();
5325   compute_code_hoist_vbeinout ();
5326   calculate_dominance_info (NULL, dominators, CDI_DOMINATORS);
5327   if (gcse_file)
5328     fprintf (gcse_file, "\n");
5329 }
5330
5331 /* Determine if the expression identified by EXPR_INDEX would
5332    reach BB unimpared if it was placed at the end of EXPR_BB.
5333
5334    It's unclear exactly what Muchnick meant by "unimpared".  It seems
5335    to me that the expression must either be computed or transparent in
5336    *every* block in the path(s) from EXPR_BB to BB.  Any other definition
5337    would allow the expression to be hoisted out of loops, even if
5338    the expression wasn't a loop invariant.
5339
5340    Contrast this to reachability for PRE where an expression is
5341    considered reachable if *any* path reaches instead of *all*
5342    paths.  */
5343
5344 static int
5345 hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited)
5346      int expr_bb;
5347      int expr_index;
5348      int bb;
5349      char *visited;
5350 {
5351   edge pred;
5352   int visited_allocated_locally = 0;
5353
5354
5355   if (visited == NULL)
5356     {
5357        visited_allocated_locally = 1;
5358        visited = xcalloc (n_basic_blocks, 1);
5359     }
5360
5361   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
5362     {
5363       int pred_bb = pred->src->index;
5364
5365       if (pred->src == ENTRY_BLOCK_PTR)
5366         break;
5367       else if (visited[pred_bb])
5368         continue;
5369
5370       /* Does this predecessor generate this expression?  */
5371       else if (TEST_BIT (comp[pred_bb], expr_index))
5372         break;
5373       else if (! TEST_BIT (transp[pred_bb], expr_index))
5374         break;
5375
5376       /* Not killed.  */
5377       else
5378         {
5379           visited[pred_bb] = 1;
5380           if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
5381                                            pred_bb, visited))
5382             break;
5383         }
5384     }
5385   if (visited_allocated_locally)
5386     free (visited);
5387
5388   return (pred == NULL);
5389 }
5390 \f
5391 /* Actually perform code hoisting.  */
5392
5393 static void
5394 hoist_code ()
5395 {
5396   int bb, dominated;
5397   unsigned int i;
5398   struct expr **index_map;
5399   struct expr *expr;
5400
5401   sbitmap_vector_zero (hoist_exprs, n_basic_blocks);
5402
5403   /* Compute a mapping from expression number (`bitmap_index') to
5404      hash table entry.  */
5405
5406   index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
5407   for (i = 0; i < expr_hash_table_size; i++)
5408     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
5409       index_map[expr->bitmap_index] = expr;
5410
5411   /* Walk over each basic block looking for potentially hoistable
5412      expressions, nothing gets hoisted from the entry block.  */
5413   for (bb = 0; bb < n_basic_blocks; bb++)
5414     {
5415       int found = 0;
5416       int insn_inserted_p;
5417
5418       /* Examine each expression that is very busy at the exit of this
5419          block.  These are the potentially hoistable expressions.  */
5420       for (i = 0; i < hoist_vbeout[bb]->n_bits; i++)
5421         {
5422           int hoistable = 0;
5423
5424           if (TEST_BIT (hoist_vbeout[bb], i) && TEST_BIT (transpout[bb], i))
5425             {
5426               /* We've found a potentially hoistable expression, now
5427                  we look at every block BB dominates to see if it
5428                  computes the expression.  */
5429               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5430                 {
5431                   /* Ignore self dominance.  */
5432                   if (bb == dominated
5433                       || ! TEST_BIT (dominators[dominated], bb))
5434                     continue;
5435
5436                   /* We've found a dominated block, now see if it computes
5437                      the busy expression and whether or not moving that
5438                      expression to the "beginning" of that block is safe.  */
5439                   if (!TEST_BIT (antloc[dominated], i))
5440                     continue;
5441
5442                   /* Note if the expression would reach the dominated block
5443                      unimpared if it was placed at the end of BB.
5444
5445                      Keep track of how many times this expression is hoistable
5446                      from a dominated block into BB.  */
5447                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5448                     hoistable++;
5449                 }
5450
5451               /* If we found more than one hoistable occurence of this
5452                  expression, then note it in the bitmap of expressions to
5453                  hoist.  It makes no sense to hoist things which are computed
5454                  in only one BB, and doing so tends to pessimize register
5455                  allocation.  One could increase this value to try harder
5456                  to avoid any possible code expansion due to register
5457                  allocation issues; however experiments have shown that
5458                  the vast majority of hoistable expressions are only movable
5459                  from two successors, so raising this threshhold is likely
5460                  to nullify any benefit we get from code hoisting.  */
5461               if (hoistable > 1)
5462                 {
5463                   SET_BIT (hoist_exprs[bb], i);
5464                   found = 1;
5465                 }
5466             }
5467         }
5468
5469       /* If we found nothing to hoist, then quit now.  */
5470       if (! found)
5471         continue;
5472
5473       /* Loop over all the hoistable expressions.  */
5474       for (i = 0; i < hoist_exprs[bb]->n_bits; i++)
5475         {
5476           /* We want to insert the expression into BB only once, so
5477              note when we've inserted it.  */
5478           insn_inserted_p = 0;
5479
5480           /* These tests should be the same as the tests above.  */
5481           if (TEST_BIT (hoist_vbeout[bb], i))
5482             {
5483               /* We've found a potentially hoistable expression, now
5484                  we look at every block BB dominates to see if it
5485                  computes the expression.  */
5486               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5487                 {
5488                   /* Ignore self dominance.  */
5489                   if (bb == dominated
5490                       || ! TEST_BIT (dominators[dominated], bb))
5491                     continue;
5492
5493                   /* We've found a dominated block, now see if it computes
5494                      the busy expression and whether or not moving that
5495                      expression to the "beginning" of that block is safe.  */
5496                   if (!TEST_BIT (antloc[dominated], i))
5497                     continue;
5498
5499                   /* The expression is computed in the dominated block and
5500                      it would be safe to compute it at the start of the
5501                      dominated block.  Now we have to determine if the
5502                      expresion would reach the dominated block if it was
5503                      placed at the end of BB.  */
5504                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5505                     {
5506                       struct expr *expr = index_map[i];
5507                       struct occr *occr = expr->antic_occr;
5508                       rtx insn;
5509                       rtx set;
5510
5511                       /* Find the right occurence of this expression.  */
5512                       while (BLOCK_NUM (occr->insn) != dominated && occr)
5513                         occr = occr->next;
5514
5515                       /* Should never happen.  */
5516                       if (!occr)
5517                         abort ();
5518
5519                       insn = occr->insn;
5520
5521                       set = single_set (insn);
5522                       if (! set)
5523                         abort ();
5524
5525                       /* Create a pseudo-reg to store the result of reaching
5526                          expressions into.  Get the mode for the new pseudo
5527                          from the mode of the original destination pseudo.  */
5528                       if (expr->reaching_reg == NULL)
5529                         expr->reaching_reg
5530                           = gen_reg_rtx (GET_MODE (SET_DEST (set)));
5531
5532                       /* In theory this should never fail since we're creating
5533                          a reg->reg copy.
5534
5535                          However, on the x86 some of the movXX patterns
5536                          actually contain clobbers of scratch regs.  This may
5537                          cause the insn created by validate_change to not
5538                          match any pattern and thus cause validate_change to
5539                          fail.  */
5540                       if (validate_change (insn, &SET_SRC (set),
5541                                            expr->reaching_reg, 0))
5542                         {
5543                           occr->deleted_p = 1;
5544                           if (!insn_inserted_p)
5545                             {
5546                               insert_insn_end_bb (index_map[i], bb, 0);
5547                               insn_inserted_p = 1;
5548                             }
5549                         }
5550                     }
5551                 }
5552             }
5553         }
5554     }
5555
5556     free (index_map);
5557 }
5558
5559 /* Top level routine to perform one code hoisting (aka unification) pass
5560
5561    Return non-zero if a change was made.  */
5562
5563 static int
5564 one_code_hoisting_pass ()
5565 {
5566   int changed = 0;
5567
5568   alloc_expr_hash_table (max_cuid);
5569   compute_expr_hash_table ();
5570   if (gcse_file)
5571     dump_hash_table (gcse_file, "Code Hosting Expressions", expr_hash_table,
5572                      expr_hash_table_size, n_exprs);
5573
5574   if (n_exprs > 0)
5575     {
5576       alloc_code_hoist_mem (n_basic_blocks, n_exprs);
5577       compute_code_hoist_data ();
5578       hoist_code ();
5579       free_code_hoist_mem ();
5580     }
5581
5582   free_expr_hash_table ();
5583
5584   return changed;
5585 }