* configure.in (LIMITS_INC_SRCDIR): New variable.
[official-gcc.git] / gcc / gcse.c
blob87257004a0a38bdb61212e96a5f9ef7009c5ec4e
1 /* Global common subexpression elimination/Partial redundancy elimination
2 and global constant/copy propagation for GNU compiler.
3 Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* TODO
23 - reordering of memory allocation and freeing to be more space efficient
24 - do rough calc of how many regs are needed in each block, and a rough
25 calc of how many regs are available in each class and use that to
26 throttle back the code in cases where RTX_COST is minimal.
27 - dead store elimination
28 - a store to the same address as a load does not kill the load if the
29 source of the store is also the destination of the load. Handling this
30 allows more load motion, particularly out of loops.
31 - ability to realloc sbitmap vectors would allow one initial computation
32 of reg_set_in_block with only subsequent additions, rather than
33 recomputing it for each pass
37 /* References searched while implementing this.
39 Compilers Principles, Techniques and Tools
40 Aho, Sethi, Ullman
41 Addison-Wesley, 1988
43 Global Optimization by Suppression of Partial Redundancies
44 E. Morel, C. Renvoise
45 communications of the acm, Vol. 22, Num. 2, Feb. 1979
47 A Portable Machine-Independent Global Optimizer - Design and Measurements
48 Frederick Chow
49 Stanford Ph.D. thesis, Dec. 1983
51 A Fast Algorithm for Code Movement Optimization
52 D.M. Dhamdhere
53 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
55 A Solution to a Problem with Morel and Renvoise's
56 Global Optimization by Suppression of Partial Redundancies
57 K-H Drechsler, M.P. Stadel
58 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
60 Practical Adaptation of the Global Optimization
61 Algorithm of Morel and Renvoise
62 D.M. Dhamdhere
63 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
65 Efficiently Computing Static Single Assignment Form and the Control
66 Dependence Graph
67 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
68 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
70 Lazy Code Motion
71 J. Knoop, O. Ruthing, B. Steffen
72 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
74 What's In a Region? Or Computing Control Dependence Regions in Near-Linear
75 Time for Reducible Flow Control
76 Thomas Ball
77 ACM Letters on Programming Languages and Systems,
78 Vol. 2, Num. 1-4, Mar-Dec 1993
80 An Efficient Representation for Sparse Sets
81 Preston Briggs, Linda Torczon
82 ACM Letters on Programming Languages and Systems,
83 Vol. 2, Num. 1-4, Mar-Dec 1993
85 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
86 K-H Drechsler, M.P. Stadel
87 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
89 Partial Dead Code Elimination
90 J. Knoop, O. Ruthing, B. Steffen
91 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
93 Effective Partial Redundancy Elimination
94 P. Briggs, K.D. Cooper
95 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
97 The Program Structure Tree: Computing Control Regions in Linear Time
98 R. Johnson, D. Pearson, K. Pingali
99 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
101 Optimal Code Motion: Theory and Practice
102 J. Knoop, O. Ruthing, B. Steffen
103 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
105 The power of assignment motion
106 J. Knoop, O. Ruthing, B. Steffen
107 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
109 Global code motion / global value numbering
110 C. Click
111 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
113 Value Driven Redundancy Elimination
114 L.T. Simpson
115 Rice University Ph.D. thesis, Apr. 1996
117 Value Numbering
118 L.T. Simpson
119 Massively Scalar Compiler Project, Rice University, Sep. 1996
121 High Performance Compilers for Parallel Computing
122 Michael Wolfe
123 Addison-Wesley, 1996
125 Advanced Compiler Design and Implementation
126 Steven Muchnick
127 Morgan Kaufmann, 1997
129 Building an Optimizing Compiler
130 Robert Morgan
131 Digital Press, 1998
133 People wishing to speed up the code here should read:
134 Elimination Algorithms for Data Flow Analysis
135 B.G. Ryder, M.C. Paull
136 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
138 How to Analyze Large Programs Efficiently and Informatively
139 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
140 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
142 People wishing to do something different can find various possibilities
143 in the above papers and elsewhere.
146 #include "config.h"
147 #include "system.h"
148 #include "toplev.h"
150 #include "rtl.h"
151 #include "tm_p.h"
152 #include "regs.h"
153 #include "hard-reg-set.h"
154 #include "flags.h"
155 #include "real.h"
156 #include "insn-config.h"
157 #include "recog.h"
158 #include "basic-block.h"
159 #include "output.h"
160 #include "function.h"
161 #include "expr.h"
162 #include "params.h"
164 #include "obstack.h"
165 #define obstack_chunk_alloc gmalloc
166 #define obstack_chunk_free free
168 /* Maximum number of passes to perform. */
169 #define MAX_PASSES 1
171 /* Propagate flow information through back edges and thus enable PRE's
172 moving loop invariant calculations out of loops.
174 Originally this tended to create worse overall code, but several
175 improvements during the development of PRE seem to have made following
176 back edges generally a win.
178 Note much of the loop invariant code motion done here would normally
179 be done by loop.c, which has more heuristics for when to move invariants
180 out of loops. At some point we might need to move some of those
181 heuristics into gcse.c. */
182 #define FOLLOW_BACK_EDGES 1
184 /* We support GCSE via Partial Redundancy Elimination. PRE optimizations
185 are a superset of those done by GCSE.
187 We perform the following steps:
189 1) Compute basic block information.
191 2) Compute table of places where registers are set.
193 3) Perform copy/constant propagation.
195 4) Perform global cse.
197 5) Perform another pass of copy/constant propagation.
199 Two passes of copy/constant propagation are done because the first one
200 enables more GCSE and the second one helps to clean up the copies that
201 GCSE creates. This is needed more for PRE than for Classic because Classic
202 GCSE will try to use an existing register containing the common
203 subexpression rather than create a new one. This is harder to do for PRE
204 because of the code motion (which Classic GCSE doesn't do).
206 Expressions we are interested in GCSE-ing are of the form
207 (set (pseudo-reg) (expression)).
208 Function want_to_gcse_p says what these are.
210 PRE handles moving invariant expressions out of loops (by treating them as
211 partially redundant).
213 Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
214 assignment) based GVN (global value numbering). L. T. Simpson's paper
215 (Rice University) on value numbering is a useful reference for this.
217 **********************
219 We used to support multiple passes but there are diminishing returns in
220 doing so. The first pass usually makes 90% of the changes that are doable.
221 A second pass can make a few more changes made possible by the first pass.
222 Experiments show any further passes don't make enough changes to justify
223 the expense.
225 A study of spec92 using an unlimited number of passes:
226 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
227 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
228 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
230 It was found doing copy propagation between each pass enables further
231 substitutions.
233 PRE is quite expensive in complicated functions because the DFA can take
234 awhile to converge. Hence we only perform one pass. Macro MAX_PASSES can
235 be modified if one wants to experiment.
237 **********************
239 The steps for PRE are:
241 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
243 2) Perform the data flow analysis for PRE.
245 3) Delete the redundant instructions
247 4) Insert the required copies [if any] that make the partially
248 redundant instructions fully redundant.
250 5) For other reaching expressions, insert an instruction to copy the value
251 to a newly created pseudo that will reach the redundant instruction.
253 The deletion is done first so that when we do insertions we
254 know which pseudo reg to use.
256 Various papers have argued that PRE DFA is expensive (O(n^2)) and others
257 argue it is not. The number of iterations for the algorithm to converge
258 is typically 2-4 so I don't view it as that expensive (relatively speaking).
260 PRE GCSE depends heavily on the second CSE pass to clean up the copies
261 we create. To make an expression reach the place where it's redundant,
262 the result of the expression is copied to a new register, and the redundant
263 expression is deleted by replacing it with this new register. Classic GCSE
264 doesn't have this problem as much as it computes the reaching defs of
265 each register in each block and thus can try to use an existing register.
267 **********************
269 A fair bit of simplicity is created by creating small functions for simple
270 tasks, even when the function is only called in one place. This may
271 measurably slow things down [or may not] by creating more function call
272 overhead than is necessary. The source is laid out so that it's trivial
273 to make the affected functions inline so that one can measure what speed
274 up, if any, can be achieved, and maybe later when things settle things can
275 be rearranged.
277 Help stamp out big monolithic functions! */
279 /* GCSE global vars. */
281 /* -dG dump file. */
282 static FILE *gcse_file;
284 /* Note whether or not we should run jump optimization after gcse. We
285 want to do this for two cases.
287 * If we changed any jumps via cprop.
289 * If we added any labels via edge splitting. */
291 static int run_jump_opt_after_gcse;
293 /* Bitmaps are normally not included in debugging dumps.
294 However it's useful to be able to print them from GDB.
295 We could create special functions for this, but it's simpler to
296 just allow passing stderr to the dump_foo fns. Since stderr can
297 be a macro, we store a copy here. */
298 static FILE *debug_stderr;
300 /* An obstack for our working variables. */
301 static struct obstack gcse_obstack;
303 /* Non-zero for each mode that supports (set (reg) (reg)).
304 This is trivially true for integer and floating point values.
305 It may or may not be true for condition codes. */
306 static char can_copy_p[(int) NUM_MACHINE_MODES];
308 /* Non-zero if can_copy_p has been initialized. */
309 static int can_copy_init_p;
311 struct reg_use {rtx reg_rtx; };
313 /* Hash table of expressions. */
315 struct expr
317 /* The expression (SET_SRC for expressions, PATTERN for assignments). */
318 rtx expr;
319 /* Index in the available expression bitmaps. */
320 int bitmap_index;
321 /* Next entry with the same hash. */
322 struct expr *next_same_hash;
323 /* List of anticipatable occurrences in basic blocks in the function.
324 An "anticipatable occurrence" is one that is the first occurrence in the
325 basic block, the operands are not modified in the basic block prior
326 to the occurrence and the output is not used between the start of
327 the block and the occurrence. */
328 struct occr *antic_occr;
329 /* List of available occurrence in basic blocks in the function.
330 An "available occurrence" is one that is the last occurrence in the
331 basic block and the operands are not modified by following statements in
332 the basic block [including this insn]. */
333 struct occr *avail_occr;
334 /* Non-null if the computation is PRE redundant.
335 The value is the newly created pseudo-reg to record a copy of the
336 expression in all the places that reach the redundant copy. */
337 rtx reaching_reg;
340 /* Occurrence of an expression.
341 There is one per basic block. If a pattern appears more than once the
342 last appearance is used [or first for anticipatable expressions]. */
344 struct occr
346 /* Next occurrence of this expression. */
347 struct occr *next;
348 /* The insn that computes the expression. */
349 rtx insn;
350 /* Non-zero if this [anticipatable] occurrence has been deleted. */
351 char deleted_p;
352 /* Non-zero if this [available] occurrence has been copied to
353 reaching_reg. */
354 /* ??? This is mutually exclusive with deleted_p, so they could share
355 the same byte. */
356 char copied_p;
359 /* Expression and copy propagation hash tables.
360 Each hash table is an array of buckets.
361 ??? It is known that if it were an array of entries, structure elements
362 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
363 not clear whether in the final analysis a sufficient amount of memory would
364 be saved as the size of the available expression bitmaps would be larger
365 [one could build a mapping table without holes afterwards though].
366 Someday I'll perform the computation and figure it out. */
368 /* Total size of the expression hash table, in elements. */
369 static unsigned int expr_hash_table_size;
371 /* The table itself.
372 This is an array of `expr_hash_table_size' elements. */
373 static struct expr **expr_hash_table;
375 /* Total size of the copy propagation hash table, in elements. */
376 static unsigned int set_hash_table_size;
378 /* The table itself.
379 This is an array of `set_hash_table_size' elements. */
380 static struct expr **set_hash_table;
382 /* Mapping of uids to cuids.
383 Only real insns get cuids. */
384 static int *uid_cuid;
386 /* Highest UID in UID_CUID. */
387 static int max_uid;
389 /* Get the cuid of an insn. */
390 #ifdef ENABLE_CHECKING
391 #define INSN_CUID(INSN) (INSN_UID (INSN) > max_uid ? (abort (), 0) : uid_cuid[INSN_UID (INSN)])
392 #else
393 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
394 #endif
396 /* Number of cuids. */
397 static int max_cuid;
399 /* Mapping of cuids to insns. */
400 static rtx *cuid_insn;
402 /* Get insn from cuid. */
403 #define CUID_INSN(CUID) (cuid_insn[CUID])
405 /* Maximum register number in function prior to doing gcse + 1.
406 Registers created during this pass have regno >= max_gcse_regno.
407 This is named with "gcse" to not collide with global of same name. */
408 static unsigned int max_gcse_regno;
410 /* Maximum number of cse-able expressions found. */
411 static int n_exprs;
413 /* Maximum number of assignments for copy propagation found. */
414 static int n_sets;
416 /* Table of registers that are modified.
418 For each register, each element is a list of places where the pseudo-reg
419 is set.
421 For simplicity, GCSE is done on sets of pseudo-regs only. PRE GCSE only
422 requires knowledge of which blocks kill which regs [and thus could use
423 a bitmap instead of the lists `reg_set_table' uses].
425 `reg_set_table' and could be turned into an array of bitmaps (num-bbs x
426 num-regs) [however perhaps it may be useful to keep the data as is]. One
427 advantage of recording things this way is that `reg_set_table' is fairly
428 sparse with respect to pseudo regs but for hard regs could be fairly dense
429 [relatively speaking]. And recording sets of pseudo-regs in lists speeds
430 up functions like compute_transp since in the case of pseudo-regs we only
431 need to iterate over the number of times a pseudo-reg is set, not over the
432 number of basic blocks [clearly there is a bit of a slow down in the cases
433 where a pseudo is set more than once in a block, however it is believed
434 that the net effect is to speed things up]. This isn't done for hard-regs
435 because recording call-clobbered hard-regs in `reg_set_table' at each
436 function call can consume a fair bit of memory, and iterating over
437 hard-regs stored this way in compute_transp will be more expensive. */
439 typedef struct reg_set
441 /* The next setting of this register. */
442 struct reg_set *next;
443 /* The insn where it was set. */
444 rtx insn;
445 } reg_set;
447 static reg_set **reg_set_table;
449 /* Size of `reg_set_table'.
450 The table starts out at max_gcse_regno + slop, and is enlarged as
451 necessary. */
452 static int reg_set_table_size;
454 /* Amount to grow `reg_set_table' by when it's full. */
455 #define REG_SET_TABLE_SLOP 100
457 /* Bitmap containing one bit for each register in the program.
458 Used when performing GCSE to track which registers have been set since
459 the start of the basic block. */
460 static sbitmap reg_set_bitmap;
462 /* For each block, a bitmap of registers set in the block.
463 This is used by expr_killed_p and compute_transp.
464 It is computed during hash table computation and not by compute_sets
465 as it includes registers added since the last pass (or between cprop and
466 gcse) and it's currently not easy to realloc sbitmap vectors. */
467 static sbitmap *reg_set_in_block;
469 /* For each block, non-zero if memory is set in that block.
470 This is computed during hash table computation and is used by
471 expr_killed_p and compute_transp.
472 ??? Handling of memory is very simple, we don't make any attempt
473 to optimize things (later).
474 ??? This can be computed by compute_sets since the information
475 doesn't change. */
476 static char *mem_set_in_block;
478 /* Various variables for statistics gathering. */
480 /* Memory used in a pass.
481 This isn't intended to be absolutely precise. Its intent is only
482 to keep an eye on memory usage. */
483 static int bytes_used;
485 /* GCSE substitutions made. */
486 static int gcse_subst_count;
487 /* Number of copy instructions created. */
488 static int gcse_create_count;
489 /* Number of constants propagated. */
490 static int const_prop_count;
491 /* Number of copys propagated. */
492 static int copy_prop_count;
494 /* These variables are used by classic GCSE.
495 Normally they'd be defined a bit later, but `rd_gen' needs to
496 be declared sooner. */
498 /* Each block has a bitmap of each type.
499 The length of each blocks bitmap is:
501 max_cuid - for reaching definitions
502 n_exprs - for available expressions
504 Thus we view the bitmaps as 2 dimensional arrays. i.e.
505 rd_kill[block_num][cuid_num]
506 ae_kill[block_num][expr_num] */
508 /* For reaching defs */
509 static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
511 /* for available exprs */
512 static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
514 /* Objects of this type are passed around by the null-pointer check
515 removal routines. */
516 struct null_pointer_info
518 /* The basic block being processed. */
519 int current_block;
520 /* The first register to be handled in this pass. */
521 unsigned int min_reg;
522 /* One greater than the last register to be handled in this pass. */
523 unsigned int max_reg;
524 sbitmap *nonnull_local;
525 sbitmap *nonnull_killed;
528 static void compute_can_copy PARAMS ((void));
529 static char *gmalloc PARAMS ((unsigned int));
530 static char *grealloc PARAMS ((char *, unsigned int));
531 static char *gcse_alloc PARAMS ((unsigned long));
532 static void alloc_gcse_mem PARAMS ((rtx));
533 static void free_gcse_mem PARAMS ((void));
534 static void alloc_reg_set_mem PARAMS ((int));
535 static void free_reg_set_mem PARAMS ((void));
536 static int get_bitmap_width PARAMS ((int, int, int));
537 static void record_one_set PARAMS ((int, rtx));
538 static void record_set_info PARAMS ((rtx, rtx, void *));
539 static void compute_sets PARAMS ((rtx));
540 static void hash_scan_insn PARAMS ((rtx, int, int));
541 static void hash_scan_set PARAMS ((rtx, rtx, int));
542 static void hash_scan_clobber PARAMS ((rtx, rtx));
543 static void hash_scan_call PARAMS ((rtx, rtx));
544 static int want_to_gcse_p PARAMS ((rtx));
545 static int oprs_unchanged_p PARAMS ((rtx, rtx, int));
546 static int oprs_anticipatable_p PARAMS ((rtx, rtx));
547 static int oprs_available_p PARAMS ((rtx, rtx));
548 static void insert_expr_in_table PARAMS ((rtx, enum machine_mode, rtx,
549 int, int));
550 static void insert_set_in_table PARAMS ((rtx, rtx));
551 static unsigned int hash_expr PARAMS ((rtx, enum machine_mode, int *, int));
552 static unsigned int hash_expr_1 PARAMS ((rtx, enum machine_mode, int *));
553 static unsigned int hash_string_1 PARAMS ((const char *));
554 static unsigned int hash_set PARAMS ((int, int));
555 static int expr_equiv_p PARAMS ((rtx, rtx));
556 static void record_last_reg_set_info PARAMS ((rtx, int));
557 static void record_last_mem_set_info PARAMS ((rtx));
558 static void record_last_set_info PARAMS ((rtx, rtx, void *));
559 static void compute_hash_table PARAMS ((int));
560 static void alloc_set_hash_table PARAMS ((int));
561 static void free_set_hash_table PARAMS ((void));
562 static void compute_set_hash_table PARAMS ((void));
563 static void alloc_expr_hash_table PARAMS ((unsigned int));
564 static void free_expr_hash_table PARAMS ((void));
565 static void compute_expr_hash_table PARAMS ((void));
566 static void dump_hash_table PARAMS ((FILE *, const char *, struct expr **,
567 int, int));
568 static struct expr *lookup_expr PARAMS ((rtx));
569 static struct expr *lookup_set PARAMS ((unsigned int, rtx));
570 static struct expr *next_set PARAMS ((unsigned int, struct expr *));
571 static void reset_opr_set_tables PARAMS ((void));
572 static int oprs_not_set_p PARAMS ((rtx, rtx));
573 static void mark_call PARAMS ((rtx));
574 static void mark_set PARAMS ((rtx, rtx));
575 static void mark_clobber PARAMS ((rtx, rtx));
576 static void mark_oprs_set PARAMS ((rtx));
577 static void alloc_cprop_mem PARAMS ((int, int));
578 static void free_cprop_mem PARAMS ((void));
579 static void compute_transp PARAMS ((rtx, int, sbitmap *, int));
580 static void compute_transpout PARAMS ((void));
581 static void compute_local_properties PARAMS ((sbitmap *, sbitmap *, sbitmap *,
582 int));
583 static void compute_cprop_data PARAMS ((void));
584 static void find_used_regs PARAMS ((rtx));
585 static int try_replace_reg PARAMS ((rtx, rtx, rtx));
586 static struct expr *find_avail_set PARAMS ((int, rtx));
587 static int cprop_jump PARAMS ((rtx, rtx, struct reg_use *, rtx));
588 #ifdef HAVE_cc0
589 static int cprop_cc0_jump PARAMS ((rtx, struct reg_use *, rtx));
590 #endif
591 static int cprop_insn PARAMS ((rtx, int));
592 static int cprop PARAMS ((int));
593 static int one_cprop_pass PARAMS ((int, int));
594 static void alloc_pre_mem PARAMS ((int, int));
595 static void free_pre_mem PARAMS ((void));
596 static void compute_pre_data PARAMS ((void));
597 static int pre_expr_reaches_here_p PARAMS ((int, struct expr *, int));
598 static void insert_insn_end_bb PARAMS ((struct expr *, int, int));
599 static void pre_insert_copy_insn PARAMS ((struct expr *, rtx));
600 static void pre_insert_copies PARAMS ((void));
601 static int pre_delete PARAMS ((void));
602 static int pre_gcse PARAMS ((void));
603 static int one_pre_gcse_pass PARAMS ((int));
604 static void add_label_notes PARAMS ((rtx, rtx));
605 static void alloc_code_hoist_mem PARAMS ((int, int));
606 static void free_code_hoist_mem PARAMS ((void));
607 static void compute_code_hoist_vbeinout PARAMS ((void));
608 static void compute_code_hoist_data PARAMS ((void));
609 static int hoist_expr_reaches_here_p PARAMS ((int, int, int, char *));
610 static void hoist_code PARAMS ((void));
611 static int one_code_hoisting_pass PARAMS ((void));
612 static void alloc_rd_mem PARAMS ((int, int));
613 static void free_rd_mem PARAMS ((void));
614 static void handle_rd_kill_set PARAMS ((rtx, int, int));
615 static void compute_kill_rd PARAMS ((void));
616 static void compute_rd PARAMS ((void));
617 static void alloc_avail_expr_mem PARAMS ((int, int));
618 static void free_avail_expr_mem PARAMS ((void));
619 static void compute_ae_gen PARAMS ((void));
620 static int expr_killed_p PARAMS ((rtx, int));
621 static void compute_ae_kill PARAMS ((sbitmap *, sbitmap *));
622 static int expr_reaches_here_p PARAMS ((struct occr *, struct expr *,
623 int, int));
624 static rtx computing_insn PARAMS ((struct expr *, rtx));
625 static int def_reaches_here_p PARAMS ((rtx, rtx));
626 static int can_disregard_other_sets PARAMS ((struct reg_set **, rtx, int));
627 static int handle_avail_expr PARAMS ((rtx, struct expr *));
628 static int classic_gcse PARAMS ((void));
629 static int one_classic_gcse_pass PARAMS ((int));
630 static void invalidate_nonnull_info PARAMS ((rtx, rtx, void *));
631 static void delete_null_pointer_checks_1 PARAMS ((varray_type *, unsigned int *,
632 sbitmap *, sbitmap *,
633 struct null_pointer_info *));
634 static rtx process_insert_insn PARAMS ((struct expr *));
635 static int pre_edge_insert PARAMS ((struct edge_list *, struct expr **));
636 static int expr_reaches_here_p_work PARAMS ((struct occr *, struct expr *,
637 int, int, char *));
638 static int pre_expr_reaches_here_p_work PARAMS ((int, struct expr *,
639 int, char *));
641 /* Entry point for global common subexpression elimination.
642 F is the first instruction in the function. */
645 gcse_main (f, file)
646 rtx f;
647 FILE *file;
649 int changed, pass;
650 /* Bytes used at start of pass. */
651 int initial_bytes_used;
652 /* Maximum number of bytes used by a pass. */
653 int max_pass_bytes;
654 /* Point to release obstack data from for each pass. */
655 char *gcse_obstack_bottom;
657 /* We do not construct an accurate cfg in functions which call
658 setjmp, so just punt to be safe. */
659 if (current_function_calls_setjmp)
660 return 0;
662 /* Assume that we do not need to run jump optimizations after gcse. */
663 run_jump_opt_after_gcse = 0;
665 /* For calling dump_foo fns from gdb. */
666 debug_stderr = stderr;
667 gcse_file = file;
669 /* Identify the basic block information for this function, including
670 successors and predecessors. */
671 max_gcse_regno = max_reg_num ();
673 if (file)
674 dump_flow_info (file);
676 /* Return if there's nothing to do. */
677 if (n_basic_blocks <= 1)
678 return 0;
680 /* Trying to perform global optimizations on flow graphs which have
681 a high connectivity will take a long time and is unlikely to be
682 particularly useful.
684 In normal circumstances a cfg should have about twice has many edges
685 as blocks. But we do not want to punish small functions which have
686 a couple switch statements. So we require a relatively large number
687 of basic blocks and the ratio of edges to blocks to be high. */
688 if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
690 if (warn_disabled_optimization)
691 warning ("GCSE disabled: %d > 1000 basic blocks and %d >= 20 edges/basic block",
692 n_basic_blocks, n_edges / n_basic_blocks);
693 return 0;
696 /* If allocating memory for the cprop bitmap would take up too much
697 storage it's better just to disable the optimization. */
698 if ((n_basic_blocks
699 * SBITMAP_SET_SIZE (max_gcse_regno)
700 * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
702 if (warn_disabled_optimization)
703 warning ("GCSE disabled: %d basic blocks and %d registers",
704 n_basic_blocks, max_gcse_regno);
706 return 0;
709 /* See what modes support reg/reg copy operations. */
710 if (! can_copy_init_p)
712 compute_can_copy ();
713 can_copy_init_p = 1;
716 gcc_obstack_init (&gcse_obstack);
717 bytes_used = 0;
719 /* Record where pseudo-registers are set. This data is kept accurate
720 during each pass. ??? We could also record hard-reg information here
721 [since it's unchanging], however it is currently done during hash table
722 computation.
724 It may be tempting to compute MEM set information here too, but MEM sets
725 will be subject to code motion one day and thus we need to compute
726 information about memory sets when we build the hash tables. */
728 alloc_reg_set_mem (max_gcse_regno);
729 compute_sets (f);
731 pass = 0;
732 initial_bytes_used = bytes_used;
733 max_pass_bytes = 0;
734 gcse_obstack_bottom = gcse_alloc (1);
735 changed = 1;
736 while (changed && pass < MAX_PASSES)
738 changed = 0;
739 if (file)
740 fprintf (file, "GCSE pass %d\n\n", pass + 1);
742 /* Initialize bytes_used to the space for the pred/succ lists,
743 and the reg_set_table data. */
744 bytes_used = initial_bytes_used;
746 /* Each pass may create new registers, so recalculate each time. */
747 max_gcse_regno = max_reg_num ();
749 alloc_gcse_mem (f);
751 /* Don't allow constant propagation to modify jumps
752 during this pass. */
753 changed = one_cprop_pass (pass + 1, 0);
755 if (optimize_size)
756 changed |= one_classic_gcse_pass (pass + 1);
757 else
759 changed |= one_pre_gcse_pass (pass + 1);
760 free_reg_set_mem ();
761 alloc_reg_set_mem (max_reg_num ());
762 compute_sets (f);
763 run_jump_opt_after_gcse = 1;
766 if (max_pass_bytes < bytes_used)
767 max_pass_bytes = bytes_used;
769 /* Free up memory, then reallocate for code hoisting. We can
770 not re-use the existing allocated memory because the tables
771 will not have info for the insns or registers created by
772 partial redundancy elimination. */
773 free_gcse_mem ();
775 /* It does not make sense to run code hoisting unless we optimizing
776 for code size -- it rarely makes programs faster, and can make
777 them bigger if we did partial redundancy elimination (when optimizing
778 for space, we use a classic gcse algorithm instead of partial
779 redundancy algorithms). */
780 if (optimize_size)
782 max_gcse_regno = max_reg_num ();
783 alloc_gcse_mem (f);
784 changed |= one_code_hoisting_pass ();
785 free_gcse_mem ();
787 if (max_pass_bytes < bytes_used)
788 max_pass_bytes = bytes_used;
791 if (file)
793 fprintf (file, "\n");
794 fflush (file);
797 obstack_free (&gcse_obstack, gcse_obstack_bottom);
798 pass++;
801 /* Do one last pass of copy propagation, including cprop into
802 conditional jumps. */
804 max_gcse_regno = max_reg_num ();
805 alloc_gcse_mem (f);
806 /* This time, go ahead and allow cprop to alter jumps. */
807 one_cprop_pass (pass + 1, 1);
808 free_gcse_mem ();
810 if (file)
812 fprintf (file, "GCSE of %s: %d basic blocks, ",
813 current_function_name, n_basic_blocks);
814 fprintf (file, "%d pass%s, %d bytes\n\n",
815 pass, pass > 1 ? "es" : "", max_pass_bytes);
818 obstack_free (&gcse_obstack, NULL_PTR);
819 free_reg_set_mem ();
820 return run_jump_opt_after_gcse;
823 /* Misc. utilities. */
825 /* Compute which modes support reg/reg copy operations. */
827 static void
828 compute_can_copy ()
830 int i;
831 #ifndef AVOID_CCMODE_COPIES
832 rtx reg,insn;
833 #endif
834 memset (can_copy_p, 0, NUM_MACHINE_MODES);
836 start_sequence ();
837 for (i = 0; i < NUM_MACHINE_MODES; i++)
838 if (GET_MODE_CLASS (i) == MODE_CC)
840 #ifdef AVOID_CCMODE_COPIES
841 can_copy_p[i] = 0;
842 #else
843 reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
844 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
845 if (recog (PATTERN (insn), insn, NULL_PTR) >= 0)
846 can_copy_p[i] = 1;
847 #endif
849 else
850 can_copy_p[i] = 1;
852 end_sequence ();
855 /* Cover function to xmalloc to record bytes allocated. */
857 static char *
858 gmalloc (size)
859 unsigned int size;
861 bytes_used += size;
862 return xmalloc (size);
865 /* Cover function to xrealloc.
866 We don't record the additional size since we don't know it.
867 It won't affect memory usage stats much anyway. */
869 static char *
870 grealloc (ptr, size)
871 char *ptr;
872 unsigned int size;
874 return xrealloc (ptr, size);
877 /* Cover function to obstack_alloc.
878 We don't need to record the bytes allocated here since
879 obstack_chunk_alloc is set to gmalloc. */
881 static char *
882 gcse_alloc (size)
883 unsigned long size;
885 return (char *) obstack_alloc (&gcse_obstack, size);
888 /* Allocate memory for the cuid mapping array,
889 and reg/memory set tracking tables.
891 This is called at the start of each pass. */
893 static void
894 alloc_gcse_mem (f)
895 rtx f;
897 int i,n;
898 rtx insn;
900 /* Find the largest UID and create a mapping from UIDs to CUIDs.
901 CUIDs are like UIDs except they increase monotonically, have no gaps,
902 and only apply to real insns. */
904 max_uid = get_max_uid ();
905 n = (max_uid + 1) * sizeof (int);
906 uid_cuid = (int *) gmalloc (n);
907 memset ((char *) uid_cuid, 0, n);
908 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
910 if (INSN_P (insn))
911 uid_cuid[INSN_UID (insn)] = i++;
912 else
913 uid_cuid[INSN_UID (insn)] = i;
916 /* Create a table mapping cuids to insns. */
918 max_cuid = i;
919 n = (max_cuid + 1) * sizeof (rtx);
920 cuid_insn = (rtx *) gmalloc (n);
921 memset ((char *) cuid_insn, 0, n);
922 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
923 if (INSN_P (insn))
924 CUID_INSN (i++) = insn;
926 /* Allocate vars to track sets of regs. */
927 reg_set_bitmap = (sbitmap) sbitmap_alloc (max_gcse_regno);
929 /* Allocate vars to track sets of regs, memory per block. */
930 reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks,
931 max_gcse_regno);
932 mem_set_in_block = (char *) gmalloc (n_basic_blocks);
935 /* Free memory allocated by alloc_gcse_mem. */
937 static void
938 free_gcse_mem ()
940 free (uid_cuid);
941 free (cuid_insn);
943 free (reg_set_bitmap);
945 free (reg_set_in_block);
946 free (mem_set_in_block);
949 /* Many of the global optimization algorithms work by solving dataflow
950 equations for various expressions. Initially, some local value is
951 computed for each expression in each block. Then, the values across the
952 various blocks are combined (by following flow graph edges) to arrive at
953 global values. Conceptually, each set of equations is independent. We
954 may therefore solve all the equations in parallel, solve them one at a
955 time, or pick any intermediate approach.
957 When you're going to need N two-dimensional bitmaps, each X (say, the
958 number of blocks) by Y (say, the number of expressions), call this
959 function. It's not important what X and Y represent; only that Y
960 correspond to the things that can be done in parallel. This function will
961 return an appropriate chunking factor C; you should solve C sets of
962 equations in parallel. By going through this function, we can easily
963 trade space against time; by solving fewer equations in parallel we use
964 less space. */
966 static int
967 get_bitmap_width (n, x, y)
968 int n;
969 int x;
970 int y;
972 /* It's not really worth figuring out *exactly* how much memory will
973 be used by a particular choice. The important thing is to get
974 something approximately right. */
975 size_t max_bitmap_memory = 10 * 1024 * 1024;
977 /* The number of bytes we'd use for a single column of minimum
978 width. */
979 size_t column_size = n * x * sizeof (SBITMAP_ELT_TYPE);
981 /* Often, it's reasonable just to solve all the equations in
982 parallel. */
983 if (column_size * SBITMAP_SET_SIZE (y) <= max_bitmap_memory)
984 return y;
986 /* Otherwise, pick the largest width we can, without going over the
987 limit. */
988 return SBITMAP_ELT_BITS * ((max_bitmap_memory + column_size - 1)
989 / column_size);
992 /* Compute the local properties of each recorded expression.
994 Local properties are those that are defined by the block, irrespective of
995 other blocks.
997 An expression is transparent in a block if its operands are not modified
998 in the block.
1000 An expression is computed (locally available) in a block if it is computed
1001 at least once and expression would contain the same value if the
1002 computation was moved to the end of the block.
1004 An expression is locally anticipatable in a block if it is computed at
1005 least once and expression would contain the same value if the computation
1006 was moved to the beginning of the block.
1008 We call this routine for cprop, pre and code hoisting. They all compute
1009 basically the same information and thus can easily share this code.
1011 TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
1012 properties. If NULL, then it is not necessary to compute or record that
1013 particular property.
1015 SETP controls which hash table to look at. If zero, this routine looks at
1016 the expr hash table; if nonzero this routine looks at the set hash table.
1017 Additionally, TRANSP is computed as ~TRANSP, since this is really cprop's
1018 ABSALTERED. */
1020 static void
1021 compute_local_properties (transp, comp, antloc, setp)
1022 sbitmap *transp;
1023 sbitmap *comp;
1024 sbitmap *antloc;
1025 int setp;
1027 unsigned int i, hash_table_size;
1028 struct expr **hash_table;
1030 /* Initialize any bitmaps that were passed in. */
1031 if (transp)
1033 if (setp)
1034 sbitmap_vector_zero (transp, n_basic_blocks);
1035 else
1036 sbitmap_vector_ones (transp, n_basic_blocks);
1039 if (comp)
1040 sbitmap_vector_zero (comp, n_basic_blocks);
1041 if (antloc)
1042 sbitmap_vector_zero (antloc, n_basic_blocks);
1044 /* We use the same code for cprop, pre and hoisting. For cprop
1045 we care about the set hash table, for pre and hoisting we
1046 care about the expr hash table. */
1047 hash_table_size = setp ? set_hash_table_size : expr_hash_table_size;
1048 hash_table = setp ? set_hash_table : expr_hash_table;
1050 for (i = 0; i < hash_table_size; i++)
1052 struct expr *expr;
1054 for (expr = hash_table[i]; expr != NULL; expr = expr->next_same_hash)
1056 int indx = expr->bitmap_index;
1057 struct occr *occr;
1059 /* The expression is transparent in this block if it is not killed.
1060 We start by assuming all are transparent [none are killed], and
1061 then reset the bits for those that are. */
1062 if (transp)
1063 compute_transp (expr->expr, indx, transp, setp);
1065 /* The occurrences recorded in antic_occr are exactly those that
1066 we want to set to non-zero in ANTLOC. */
1067 if (antloc)
1068 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1070 SET_BIT (antloc[BLOCK_NUM (occr->insn)], indx);
1072 /* While we're scanning the table, this is a good place to
1073 initialize this. */
1074 occr->deleted_p = 0;
1077 /* The occurrences recorded in avail_occr are exactly those that
1078 we want to set to non-zero in COMP. */
1079 if (comp)
1080 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
1082 SET_BIT (comp[BLOCK_NUM (occr->insn)], indx);
1084 /* While we're scanning the table, this is a good place to
1085 initialize this. */
1086 occr->copied_p = 0;
1089 /* While we're scanning the table, this is a good place to
1090 initialize this. */
1091 expr->reaching_reg = 0;
1096 /* Register set information.
1098 `reg_set_table' records where each register is set or otherwise
1099 modified. */
1101 static struct obstack reg_set_obstack;
1103 static void
1104 alloc_reg_set_mem (n_regs)
1105 int n_regs;
1107 unsigned int n;
1109 reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
1110 n = reg_set_table_size * sizeof (struct reg_set *);
1111 reg_set_table = (struct reg_set **) gmalloc (n);
1112 memset ((char *) reg_set_table, 0, n);
1114 gcc_obstack_init (&reg_set_obstack);
1117 static void
1118 free_reg_set_mem ()
1120 free (reg_set_table);
1121 obstack_free (&reg_set_obstack, NULL_PTR);
1124 /* Record REGNO in the reg_set table. */
1126 static void
1127 record_one_set (regno, insn)
1128 int regno;
1129 rtx insn;
1131 /* allocate a new reg_set element and link it onto the list */
1132 struct reg_set *new_reg_info;
1134 /* If the table isn't big enough, enlarge it. */
1135 if (regno >= reg_set_table_size)
1137 int new_size = regno + REG_SET_TABLE_SLOP;
1139 reg_set_table
1140 = (struct reg_set **) grealloc ((char *) reg_set_table,
1141 new_size * sizeof (struct reg_set *));
1142 memset ((char *) (reg_set_table + reg_set_table_size), 0,
1143 (new_size - reg_set_table_size) * sizeof (struct reg_set *));
1144 reg_set_table_size = new_size;
1147 new_reg_info = (struct reg_set *) obstack_alloc (&reg_set_obstack,
1148 sizeof (struct reg_set));
1149 bytes_used += sizeof (struct reg_set);
1150 new_reg_info->insn = insn;
1151 new_reg_info->next = reg_set_table[regno];
1152 reg_set_table[regno] = new_reg_info;
1155 /* Called from compute_sets via note_stores to handle one SET or CLOBBER in
1156 an insn. The DATA is really the instruction in which the SET is
1157 occurring. */
1159 static void
1160 record_set_info (dest, setter, data)
1161 rtx dest, setter ATTRIBUTE_UNUSED;
1162 void *data;
1164 rtx record_set_insn = (rtx) data;
1166 if (GET_CODE (dest) == REG && REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1167 record_one_set (REGNO (dest), record_set_insn);
1170 /* Scan the function and record each set of each pseudo-register.
1172 This is called once, at the start of the gcse pass. See the comments for
1173 `reg_set_table' for further documenation. */
1175 static void
1176 compute_sets (f)
1177 rtx f;
1179 rtx insn;
1181 for (insn = f; insn != 0; insn = NEXT_INSN (insn))
1182 if (INSN_P (insn))
1183 note_stores (PATTERN (insn), record_set_info, insn);
1186 /* Hash table support. */
1188 /* For each register, the cuid of the first/last insn in the block to set it,
1189 or -1 if not set. */
1190 #define NEVER_SET -1
1191 static int *reg_first_set;
1192 static int *reg_last_set;
1194 /* While computing "first/last set" info, this is the CUID of first/last insn
1195 to set memory or -1 if not set. `mem_last_set' is also used when
1196 performing GCSE to record whether memory has been set since the beginning
1197 of the block.
1199 Note that handling of memory is very simple, we don't make any attempt
1200 to optimize things (later). */
1201 static int mem_first_set;
1202 static int mem_last_set;
1204 /* Perform a quick check whether X, the source of a set, is something
1205 we want to consider for GCSE. */
1207 static int
1208 want_to_gcse_p (x)
1209 rtx x;
1211 switch (GET_CODE (x))
1213 case REG:
1214 case SUBREG:
1215 case CONST_INT:
1216 case CONST_DOUBLE:
1217 case CALL:
1218 return 0;
1220 default:
1221 break;
1224 return 1;
1227 /* Return non-zero if the operands of expression X are unchanged from the
1228 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1229 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */
1231 static int
1232 oprs_unchanged_p (x, insn, avail_p)
1233 rtx x, insn;
1234 int avail_p;
1236 int i, j;
1237 enum rtx_code code;
1238 const char *fmt;
1240 if (x == 0)
1241 return 1;
1243 code = GET_CODE (x);
1244 switch (code)
1246 case REG:
1247 if (avail_p)
1248 return (reg_last_set[REGNO (x)] == NEVER_SET
1249 || reg_last_set[REGNO (x)] < INSN_CUID (insn));
1250 else
1251 return (reg_first_set[REGNO (x)] == NEVER_SET
1252 || reg_first_set[REGNO (x)] >= INSN_CUID (insn));
1254 case MEM:
1255 if (avail_p && mem_last_set != NEVER_SET
1256 && mem_last_set >= INSN_CUID (insn))
1257 return 0;
1258 else if (! avail_p && mem_first_set != NEVER_SET
1259 && mem_first_set < INSN_CUID (insn))
1260 return 0;
1261 else
1262 return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
1264 case PRE_DEC:
1265 case PRE_INC:
1266 case POST_DEC:
1267 case POST_INC:
1268 case PRE_MODIFY:
1269 case POST_MODIFY:
1270 return 0;
1272 case PC:
1273 case CC0: /*FIXME*/
1274 case CONST:
1275 case CONST_INT:
1276 case CONST_DOUBLE:
1277 case SYMBOL_REF:
1278 case LABEL_REF:
1279 case ADDR_VEC:
1280 case ADDR_DIFF_VEC:
1281 return 1;
1283 default:
1284 break;
1287 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1289 if (fmt[i] == 'e')
1291 /* If we are about to do the last recursive call needed at this
1292 level, change it into iteration. This function is called enough
1293 to be worth it. */
1294 if (i == 0)
1295 return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
1297 else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
1298 return 0;
1300 else if (fmt[i] == 'E')
1301 for (j = 0; j < XVECLEN (x, i); j++)
1302 if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1303 return 0;
1306 return 1;
1309 /* Return non-zero if the operands of expression X are unchanged from
1310 the start of INSN's basic block up to but not including INSN. */
1312 static int
1313 oprs_anticipatable_p (x, insn)
1314 rtx x, insn;
1316 return oprs_unchanged_p (x, insn, 0);
1319 /* Return non-zero if the operands of expression X are unchanged from
1320 INSN to the end of INSN's basic block. */
1322 static int
1323 oprs_available_p (x, insn)
1324 rtx x, insn;
1326 return oprs_unchanged_p (x, insn, 1);
1329 /* Hash expression X.
1331 MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean
1332 indicating if a volatile operand is found or if the expression contains
1333 something we don't want to insert in the table.
1335 ??? One might want to merge this with canon_hash. Later. */
1337 static unsigned int
1338 hash_expr (x, mode, do_not_record_p, hash_table_size)
1339 rtx x;
1340 enum machine_mode mode;
1341 int *do_not_record_p;
1342 int hash_table_size;
1344 unsigned int hash;
1346 *do_not_record_p = 0;
1348 hash = hash_expr_1 (x, mode, do_not_record_p);
1349 return hash % hash_table_size;
1351 /* Hash a string. Just add its bytes up. */
1352 static inline unsigned
1353 hash_string_1 (ps)
1354 const char *ps;
1356 unsigned hash = 0;
1357 const unsigned char *p = (const unsigned char *)ps;
1359 if (p)
1360 while (*p)
1361 hash += *p++;
1363 return hash;
1366 /* Subroutine of hash_expr to do the actual work. */
1368 static unsigned int
1369 hash_expr_1 (x, mode, do_not_record_p)
1370 rtx x;
1371 enum machine_mode mode;
1372 int *do_not_record_p;
1374 int i, j;
1375 unsigned hash = 0;
1376 enum rtx_code code;
1377 const char *fmt;
1379 /* Used to turn recursion into iteration. We can't rely on GCC's
1380 tail-recursion eliminatio since we need to keep accumulating values
1381 in HASH. */
1383 if (x == 0)
1384 return hash;
1386 repeat:
1387 code = GET_CODE (x);
1388 switch (code)
1390 case REG:
1391 hash += ((unsigned int) REG << 7) + REGNO (x);
1392 return hash;
1394 case CONST_INT:
1395 hash += (((unsigned int) CONST_INT << 7) + (unsigned int) mode
1396 + (unsigned int) INTVAL (x));
1397 return hash;
1399 case CONST_DOUBLE:
1400 /* This is like the general case, except that it only counts
1401 the integers representing the constant. */
1402 hash += (unsigned int) code + (unsigned int) GET_MODE (x);
1403 if (GET_MODE (x) != VOIDmode)
1404 for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++)
1405 hash += (unsigned int) XWINT (x, i);
1406 else
1407 hash += ((unsigned int) CONST_DOUBLE_LOW (x)
1408 + (unsigned int) CONST_DOUBLE_HIGH (x));
1409 return hash;
1411 /* Assume there is only one rtx object for any given label. */
1412 case LABEL_REF:
1413 /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1414 differences and differences between each stage's debugging dumps. */
1415 hash += (((unsigned int) LABEL_REF << 7)
1416 + CODE_LABEL_NUMBER (XEXP (x, 0)));
1417 return hash;
1419 case SYMBOL_REF:
1421 /* Don't hash on the symbol's address to avoid bootstrap differences.
1422 Different hash values may cause expressions to be recorded in
1423 different orders and thus different registers to be used in the
1424 final assembler. This also avoids differences in the dump files
1425 between various stages. */
1426 unsigned int h = 0;
1427 const unsigned char *p = (const unsigned char *) XSTR (x, 0);
1429 while (*p)
1430 h += (h << 7) + *p++; /* ??? revisit */
1432 hash += ((unsigned int) SYMBOL_REF << 7) + h;
1433 return hash;
1436 case MEM:
1437 if (MEM_VOLATILE_P (x))
1439 *do_not_record_p = 1;
1440 return 0;
1443 hash += (unsigned int) MEM;
1444 hash += MEM_ALIAS_SET (x);
1445 x = XEXP (x, 0);
1446 goto repeat;
1448 case PRE_DEC:
1449 case PRE_INC:
1450 case POST_DEC:
1451 case POST_INC:
1452 case PC:
1453 case CC0:
1454 case CALL:
1455 case UNSPEC_VOLATILE:
1456 *do_not_record_p = 1;
1457 return 0;
1459 case ASM_OPERANDS:
1460 if (MEM_VOLATILE_P (x))
1462 *do_not_record_p = 1;
1463 return 0;
1465 else
1467 /* We don't want to take the filename and line into account. */
1468 hash += (unsigned) code + (unsigned) GET_MODE (x)
1469 + hash_string_1 (ASM_OPERANDS_TEMPLATE (x))
1470 + hash_string_1 (ASM_OPERANDS_OUTPUT_CONSTRAINT (x))
1471 + (unsigned) ASM_OPERANDS_OUTPUT_IDX (x);
1473 if (ASM_OPERANDS_INPUT_LENGTH (x))
1475 for (i = 1; i < ASM_OPERANDS_INPUT_LENGTH (x); i++)
1477 hash += (hash_expr_1 (ASM_OPERANDS_INPUT (x, i),
1478 GET_MODE (ASM_OPERANDS_INPUT (x, i)),
1479 do_not_record_p)
1480 + hash_string_1 (ASM_OPERANDS_INPUT_CONSTRAINT
1481 (x, i)));
1484 hash += hash_string_1 (ASM_OPERANDS_INPUT_CONSTRAINT (x, 0));
1485 x = ASM_OPERANDS_INPUT (x, 0);
1486 mode = GET_MODE (x);
1487 goto repeat;
1489 return hash;
1492 default:
1493 break;
1496 hash += (unsigned) code + (unsigned) GET_MODE (x);
1497 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1499 if (fmt[i] == 'e')
1501 /* If we are about to do the last recursive call
1502 needed at this level, change it into iteration.
1503 This function is called enough to be worth it. */
1504 if (i == 0)
1506 x = XEXP (x, i);
1507 goto repeat;
1510 hash += hash_expr_1 (XEXP (x, i), 0, do_not_record_p);
1511 if (*do_not_record_p)
1512 return 0;
1515 else if (fmt[i] == 'E')
1516 for (j = 0; j < XVECLEN (x, i); j++)
1518 hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p);
1519 if (*do_not_record_p)
1520 return 0;
1523 else if (fmt[i] == 's')
1524 hash += hash_string_1 (XSTR (x, i));
1525 else if (fmt[i] == 'i')
1526 hash += (unsigned int) XINT (x, i);
1527 else
1528 abort ();
1531 return hash;
1534 /* Hash a set of register REGNO.
1536 Sets are hashed on the register that is set. This simplifies the PRE copy
1537 propagation code.
1539 ??? May need to make things more elaborate. Later, as necessary. */
1541 static unsigned int
1542 hash_set (regno, hash_table_size)
1543 int regno;
1544 int hash_table_size;
1546 unsigned int hash;
1548 hash = regno;
1549 return hash % hash_table_size;
1552 /* Return non-zero if exp1 is equivalent to exp2.
1553 ??? Borrowed from cse.c. Might want to remerge with cse.c. Later. */
1555 static int
1556 expr_equiv_p (x, y)
1557 rtx x, y;
1559 register int i, j;
1560 register enum rtx_code code;
1561 register const char *fmt;
1563 if (x == y)
1564 return 1;
1566 if (x == 0 || y == 0)
1567 return x == y;
1569 code = GET_CODE (x);
1570 if (code != GET_CODE (y))
1571 return 0;
1573 /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent. */
1574 if (GET_MODE (x) != GET_MODE (y))
1575 return 0;
1577 switch (code)
1579 case PC:
1580 case CC0:
1581 return x == y;
1583 case CONST_INT:
1584 return INTVAL (x) == INTVAL (y);
1586 case LABEL_REF:
1587 return XEXP (x, 0) == XEXP (y, 0);
1589 case SYMBOL_REF:
1590 return XSTR (x, 0) == XSTR (y, 0);
1592 case REG:
1593 return REGNO (x) == REGNO (y);
1595 case MEM:
1596 /* Can't merge two expressions in different alias sets, since we can
1597 decide that the expression is transparent in a block when it isn't,
1598 due to it being set with the different alias set. */
1599 if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y))
1600 return 0;
1601 break;
1603 /* For commutative operations, check both orders. */
1604 case PLUS:
1605 case MULT:
1606 case AND:
1607 case IOR:
1608 case XOR:
1609 case NE:
1610 case EQ:
1611 return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0))
1612 && expr_equiv_p (XEXP (x, 1), XEXP (y, 1)))
1613 || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1))
1614 && expr_equiv_p (XEXP (x, 1), XEXP (y, 0))));
1616 case ASM_OPERANDS:
1617 /* We don't use the generic code below because we want to
1618 disregard filename and line numbers. */
1620 /* A volatile asm isn't equivalent to any other. */
1621 if (MEM_VOLATILE_P (x) || MEM_VOLATILE_P (y))
1622 return 0;
1624 if (GET_MODE (x) != GET_MODE (y)
1625 || strcmp (ASM_OPERANDS_TEMPLATE (x), ASM_OPERANDS_TEMPLATE (y))
1626 || strcmp (ASM_OPERANDS_OUTPUT_CONSTRAINT (x),
1627 ASM_OPERANDS_OUTPUT_CONSTRAINT (y))
1628 || ASM_OPERANDS_OUTPUT_IDX (x) != ASM_OPERANDS_OUTPUT_IDX (y)
1629 || ASM_OPERANDS_INPUT_LENGTH (x) != ASM_OPERANDS_INPUT_LENGTH (y))
1630 return 0;
1632 if (ASM_OPERANDS_INPUT_LENGTH (x))
1634 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
1635 if (! expr_equiv_p (ASM_OPERANDS_INPUT (x, i),
1636 ASM_OPERANDS_INPUT (y, i))
1637 || strcmp (ASM_OPERANDS_INPUT_CONSTRAINT (x, i),
1638 ASM_OPERANDS_INPUT_CONSTRAINT (y, i)))
1639 return 0;
1642 return 1;
1644 default:
1645 break;
1648 /* Compare the elements. If any pair of corresponding elements
1649 fail to match, return 0 for the whole thing. */
1651 fmt = GET_RTX_FORMAT (code);
1652 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1654 switch (fmt[i])
1656 case 'e':
1657 if (! expr_equiv_p (XEXP (x, i), XEXP (y, i)))
1658 return 0;
1659 break;
1661 case 'E':
1662 if (XVECLEN (x, i) != XVECLEN (y, i))
1663 return 0;
1664 for (j = 0; j < XVECLEN (x, i); j++)
1665 if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j)))
1666 return 0;
1667 break;
1669 case 's':
1670 if (strcmp (XSTR (x, i), XSTR (y, i)))
1671 return 0;
1672 break;
1674 case 'i':
1675 if (XINT (x, i) != XINT (y, i))
1676 return 0;
1677 break;
1679 case 'w':
1680 if (XWINT (x, i) != XWINT (y, i))
1681 return 0;
1682 break;
1684 case '0':
1685 break;
1687 default:
1688 abort ();
1692 return 1;
1695 /* Insert expression X in INSN in the hash table.
1696 If it is already present, record it as the last occurrence in INSN's
1697 basic block.
1699 MODE is the mode of the value X is being stored into.
1700 It is only used if X is a CONST_INT.
1702 ANTIC_P is non-zero if X is an anticipatable expression.
1703 AVAIL_P is non-zero if X is an available expression. */
1705 static void
1706 insert_expr_in_table (x, mode, insn, antic_p, avail_p)
1707 rtx x;
1708 enum machine_mode mode;
1709 rtx insn;
1710 int antic_p, avail_p;
1712 int found, do_not_record_p;
1713 unsigned int hash;
1714 struct expr *cur_expr, *last_expr = NULL;
1715 struct occr *antic_occr, *avail_occr;
1716 struct occr *last_occr = NULL;
1718 hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size);
1720 /* Do not insert expression in table if it contains volatile operands,
1721 or if hash_expr determines the expression is something we don't want
1722 to or can't handle. */
1723 if (do_not_record_p)
1724 return;
1726 cur_expr = expr_hash_table[hash];
1727 found = 0;
1729 while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1731 /* If the expression isn't found, save a pointer to the end of
1732 the list. */
1733 last_expr = cur_expr;
1734 cur_expr = cur_expr->next_same_hash;
1737 if (! found)
1739 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1740 bytes_used += sizeof (struct expr);
1741 if (expr_hash_table[hash] == NULL)
1742 /* This is the first pattern that hashed to this index. */
1743 expr_hash_table[hash] = cur_expr;
1744 else
1745 /* Add EXPR to end of this hash chain. */
1746 last_expr->next_same_hash = cur_expr;
1748 /* Set the fields of the expr element. */
1749 cur_expr->expr = x;
1750 cur_expr->bitmap_index = n_exprs++;
1751 cur_expr->next_same_hash = NULL;
1752 cur_expr->antic_occr = NULL;
1753 cur_expr->avail_occr = NULL;
1756 /* Now record the occurrence(s). */
1757 if (antic_p)
1759 antic_occr = cur_expr->antic_occr;
1761 /* Search for another occurrence in the same basic block. */
1762 while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
1764 /* If an occurrence isn't found, save a pointer to the end of
1765 the list. */
1766 last_occr = antic_occr;
1767 antic_occr = antic_occr->next;
1770 if (antic_occr)
1771 /* Found another instance of the expression in the same basic block.
1772 Prefer the currently recorded one. We want the first one in the
1773 block and the block is scanned from start to end. */
1774 ; /* nothing to do */
1775 else
1777 /* First occurrence of this expression in this basic block. */
1778 antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1779 bytes_used += sizeof (struct occr);
1780 /* First occurrence of this expression in any block? */
1781 if (cur_expr->antic_occr == NULL)
1782 cur_expr->antic_occr = antic_occr;
1783 else
1784 last_occr->next = antic_occr;
1786 antic_occr->insn = insn;
1787 antic_occr->next = NULL;
1791 if (avail_p)
1793 avail_occr = cur_expr->avail_occr;
1795 /* Search for another occurrence in the same basic block. */
1796 while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn))
1798 /* If an occurrence isn't found, save a pointer to the end of
1799 the list. */
1800 last_occr = avail_occr;
1801 avail_occr = avail_occr->next;
1804 if (avail_occr)
1805 /* Found another instance of the expression in the same basic block.
1806 Prefer this occurrence to the currently recorded one. We want
1807 the last one in the block and the block is scanned from start
1808 to end. */
1809 avail_occr->insn = insn;
1810 else
1812 /* First occurrence of this expression in this basic block. */
1813 avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1814 bytes_used += sizeof (struct occr);
1816 /* First occurrence of this expression in any block? */
1817 if (cur_expr->avail_occr == NULL)
1818 cur_expr->avail_occr = avail_occr;
1819 else
1820 last_occr->next = avail_occr;
1822 avail_occr->insn = insn;
1823 avail_occr->next = NULL;
1828 /* Insert pattern X in INSN in the hash table.
1829 X is a SET of a reg to either another reg or a constant.
1830 If it is already present, record it as the last occurrence in INSN's
1831 basic block. */
1833 static void
1834 insert_set_in_table (x, insn)
1835 rtx x;
1836 rtx insn;
1838 int found;
1839 unsigned int hash;
1840 struct expr *cur_expr, *last_expr = NULL;
1841 struct occr *cur_occr, *last_occr = NULL;
1843 if (GET_CODE (x) != SET
1844 || GET_CODE (SET_DEST (x)) != REG)
1845 abort ();
1847 hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size);
1849 cur_expr = set_hash_table[hash];
1850 found = 0;
1852 while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1854 /* If the expression isn't found, save a pointer to the end of
1855 the list. */
1856 last_expr = cur_expr;
1857 cur_expr = cur_expr->next_same_hash;
1860 if (! found)
1862 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1863 bytes_used += sizeof (struct expr);
1864 if (set_hash_table[hash] == NULL)
1865 /* This is the first pattern that hashed to this index. */
1866 set_hash_table[hash] = cur_expr;
1867 else
1868 /* Add EXPR to end of this hash chain. */
1869 last_expr->next_same_hash = cur_expr;
1871 /* Set the fields of the expr element.
1872 We must copy X because it can be modified when copy propagation is
1873 performed on its operands. */
1874 /* ??? Should this go in a different obstack? */
1875 cur_expr->expr = copy_rtx (x);
1876 cur_expr->bitmap_index = n_sets++;
1877 cur_expr->next_same_hash = NULL;
1878 cur_expr->antic_occr = NULL;
1879 cur_expr->avail_occr = NULL;
1882 /* Now record the occurrence. */
1883 cur_occr = cur_expr->avail_occr;
1885 /* Search for another occurrence in the same basic block. */
1886 while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn))
1888 /* If an occurrence isn't found, save a pointer to the end of
1889 the list. */
1890 last_occr = cur_occr;
1891 cur_occr = cur_occr->next;
1894 if (cur_occr)
1895 /* Found another instance of the expression in the same basic block.
1896 Prefer this occurrence to the currently recorded one. We want the
1897 last one in the block and the block is scanned from start to end. */
1898 cur_occr->insn = insn;
1899 else
1901 /* First occurrence of this expression in this basic block. */
1902 cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1903 bytes_used += sizeof (struct occr);
1905 /* First occurrence of this expression in any block? */
1906 if (cur_expr->avail_occr == NULL)
1907 cur_expr->avail_occr = cur_occr;
1908 else
1909 last_occr->next = cur_occr;
1911 cur_occr->insn = insn;
1912 cur_occr->next = NULL;
1916 /* Scan pattern PAT of INSN and add an entry to the hash table. If SET_P is
1917 non-zero, this is for the assignment hash table, otherwise it is for the
1918 expression hash table. */
1920 static void
1921 hash_scan_set (pat, insn, set_p)
1922 rtx pat, insn;
1923 int set_p;
1925 rtx src = SET_SRC (pat);
1926 rtx dest = SET_DEST (pat);
1928 if (GET_CODE (src) == CALL)
1929 hash_scan_call (src, insn);
1931 if (GET_CODE (dest) == REG)
1933 int regno = REGNO (dest);
1934 rtx tmp;
1936 /* Only record sets of pseudo-regs in the hash table. */
1937 if (! set_p
1938 && regno >= FIRST_PSEUDO_REGISTER
1939 /* Don't GCSE something if we can't do a reg/reg copy. */
1940 && can_copy_p [GET_MODE (dest)]
1941 /* Is SET_SRC something we want to gcse? */
1942 && want_to_gcse_p (src))
1944 /* An expression is not anticipatable if its operands are
1945 modified before this insn. */
1946 int antic_p = oprs_anticipatable_p (src, insn);
1947 /* An expression is not available if its operands are
1948 subsequently modified, including this insn. */
1949 int avail_p = oprs_available_p (src, insn);
1951 insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p);
1954 /* Record sets for constant/copy propagation. */
1955 else if (set_p
1956 && regno >= FIRST_PSEUDO_REGISTER
1957 && ((GET_CODE (src) == REG
1958 && REGNO (src) >= FIRST_PSEUDO_REGISTER
1959 && can_copy_p [GET_MODE (dest)])
1960 || GET_CODE (src) == CONST_INT
1961 || GET_CODE (src) == SYMBOL_REF
1962 || GET_CODE (src) == CONST_DOUBLE)
1963 /* A copy is not available if its src or dest is subsequently
1964 modified. Here we want to search from INSN+1 on, but
1965 oprs_available_p searches from INSN on. */
1966 && (insn == BLOCK_END (BLOCK_NUM (insn))
1967 || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
1968 && oprs_available_p (pat, tmp))))
1969 insert_set_in_table (pat, insn);
1973 static void
1974 hash_scan_clobber (x, insn)
1975 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1977 /* Currently nothing to do. */
1980 static void
1981 hash_scan_call (x, insn)
1982 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1984 /* Currently nothing to do. */
1987 /* Process INSN and add hash table entries as appropriate.
1989 Only available expressions that set a single pseudo-reg are recorded.
1991 Single sets in a PARALLEL could be handled, but it's an extra complication
1992 that isn't dealt with right now. The trick is handling the CLOBBERs that
1993 are also in the PARALLEL. Later.
1995 If SET_P is non-zero, this is for the assignment hash table,
1996 otherwise it is for the expression hash table.
1997 If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
1998 not record any expressions. */
2000 static void
2001 hash_scan_insn (insn, set_p, in_libcall_block)
2002 rtx insn;
2003 int set_p;
2004 int in_libcall_block;
2006 rtx pat = PATTERN (insn);
2007 int i;
2009 /* Pick out the sets of INSN and for other forms of instructions record
2010 what's been modified. */
2012 if (GET_CODE (pat) == SET && ! in_libcall_block)
2014 /* Ignore obvious no-ops. */
2015 if (SET_SRC (pat) != SET_DEST (pat))
2016 hash_scan_set (pat, insn, set_p);
2018 else if (GET_CODE (pat) == PARALLEL)
2019 for (i = 0; i < XVECLEN (pat, 0); i++)
2021 rtx x = XVECEXP (pat, 0, i);
2023 if (GET_CODE (x) == SET)
2025 if (GET_CODE (SET_SRC (x)) == CALL)
2026 hash_scan_call (SET_SRC (x), insn);
2028 else if (GET_CODE (x) == CLOBBER)
2029 hash_scan_clobber (x, insn);
2030 else if (GET_CODE (x) == CALL)
2031 hash_scan_call (x, insn);
2034 else if (GET_CODE (pat) == CLOBBER)
2035 hash_scan_clobber (pat, insn);
2036 else if (GET_CODE (pat) == CALL)
2037 hash_scan_call (pat, insn);
2040 static void
2041 dump_hash_table (file, name, table, table_size, total_size)
2042 FILE *file;
2043 const char *name;
2044 struct expr **table;
2045 int table_size, total_size;
2047 int i;
2048 /* Flattened out table, so it's printed in proper order. */
2049 struct expr **flat_table;
2050 unsigned int *hash_val;
2051 struct expr *expr;
2053 flat_table
2054 = (struct expr **) xcalloc (total_size, sizeof (struct expr *));
2055 hash_val = (unsigned int *) xmalloc (total_size * sizeof (unsigned int));
2057 for (i = 0; i < table_size; i++)
2058 for (expr = table[i]; expr != NULL; expr = expr->next_same_hash)
2060 flat_table[expr->bitmap_index] = expr;
2061 hash_val[expr->bitmap_index] = i;
2064 fprintf (file, "%s hash table (%d buckets, %d entries)\n",
2065 name, table_size, total_size);
2067 for (i = 0; i < total_size; i++)
2068 if (flat_table[i] != 0)
2070 expr = flat_table[i];
2071 fprintf (file, "Index %d (hash value %d)\n ",
2072 expr->bitmap_index, hash_val[i]);
2073 print_rtl (file, expr->expr);
2074 fprintf (file, "\n");
2077 fprintf (file, "\n");
2079 free (flat_table);
2080 free (hash_val);
2083 /* Record register first/last/block set information for REGNO in INSN.
2085 reg_first_set records the first place in the block where the register
2086 is set and is used to compute "anticipatability".
2088 reg_last_set records the last place in the block where the register
2089 is set and is used to compute "availability".
2091 reg_set_in_block records whether the register is set in the block
2092 and is used to compute "transparency". */
2094 static void
2095 record_last_reg_set_info (insn, regno)
2096 rtx insn;
2097 int regno;
2099 if (reg_first_set[regno] == NEVER_SET)
2100 reg_first_set[regno] = INSN_CUID (insn);
2102 reg_last_set[regno] = INSN_CUID (insn);
2103 SET_BIT (reg_set_in_block[BLOCK_NUM (insn)], regno);
2106 /* Record memory first/last/block set information for INSN. */
2108 static void
2109 record_last_mem_set_info (insn)
2110 rtx insn;
2112 if (mem_first_set == NEVER_SET)
2113 mem_first_set = INSN_CUID (insn);
2115 mem_last_set = INSN_CUID (insn);
2116 mem_set_in_block[BLOCK_NUM (insn)] = 1;
2119 /* Called from compute_hash_table via note_stores to handle one
2120 SET or CLOBBER in an insn. DATA is really the instruction in which
2121 the SET is taking place. */
2123 static void
2124 record_last_set_info (dest, setter, data)
2125 rtx dest, setter ATTRIBUTE_UNUSED;
2126 void *data;
2128 rtx last_set_insn = (rtx) data;
2130 if (GET_CODE (dest) == SUBREG)
2131 dest = SUBREG_REG (dest);
2133 if (GET_CODE (dest) == REG)
2134 record_last_reg_set_info (last_set_insn, REGNO (dest));
2135 else if (GET_CODE (dest) == MEM
2136 /* Ignore pushes, they clobber nothing. */
2137 && ! push_operand (dest, GET_MODE (dest)))
2138 record_last_mem_set_info (last_set_insn);
2141 /* Top level function to create an expression or assignment hash table.
2143 Expression entries are placed in the hash table if
2144 - they are of the form (set (pseudo-reg) src),
2145 - src is something we want to perform GCSE on,
2146 - none of the operands are subsequently modified in the block
2148 Assignment entries are placed in the hash table if
2149 - they are of the form (set (pseudo-reg) src),
2150 - src is something we want to perform const/copy propagation on,
2151 - none of the operands or target are subsequently modified in the block
2153 Currently src must be a pseudo-reg or a const_int.
2155 F is the first insn.
2156 SET_P is non-zero for computing the assignment hash table. */
2158 static void
2159 compute_hash_table (set_p)
2160 int set_p;
2162 int bb;
2164 /* While we compute the hash table we also compute a bit array of which
2165 registers are set in which blocks.
2166 We also compute which blocks set memory, in the absence of aliasing
2167 support [which is TODO].
2168 ??? This isn't needed during const/copy propagation, but it's cheap to
2169 compute. Later. */
2170 sbitmap_vector_zero (reg_set_in_block, n_basic_blocks);
2171 memset ((char *) mem_set_in_block, 0, n_basic_blocks);
2173 /* Some working arrays used to track first and last set in each block. */
2174 /* ??? One could use alloca here, but at some size a threshold is crossed
2175 beyond which one should use malloc. Are we at that threshold here? */
2176 reg_first_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2177 reg_last_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2179 for (bb = 0; bb < n_basic_blocks; bb++)
2181 rtx insn;
2182 unsigned int regno;
2183 int in_libcall_block;
2184 unsigned int i;
2186 /* First pass over the instructions records information used to
2187 determine when registers and memory are first and last set.
2188 ??? The mem_set_in_block and hard-reg reg_set_in_block computation
2189 could be moved to compute_sets since they currently don't change. */
2191 for (i = 0; i < max_gcse_regno; i++)
2192 reg_first_set[i] = reg_last_set[i] = NEVER_SET;
2194 mem_first_set = NEVER_SET;
2195 mem_last_set = NEVER_SET;
2197 for (insn = BLOCK_HEAD (bb);
2198 insn && insn != NEXT_INSN (BLOCK_END (bb));
2199 insn = NEXT_INSN (insn))
2201 #ifdef NON_SAVING_SETJMP
2202 if (NON_SAVING_SETJMP && GET_CODE (insn) == NOTE
2203 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)
2205 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2206 record_last_reg_set_info (insn, regno);
2207 continue;
2209 #endif
2211 if (! INSN_P (insn))
2212 continue;
2214 if (GET_CODE (insn) == CALL_INSN)
2216 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2217 if ((call_used_regs[regno]
2218 && regno != STACK_POINTER_REGNUM
2219 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2220 && regno != HARD_FRAME_POINTER_REGNUM
2221 #endif
2222 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2223 && ! (regno == ARG_POINTER_REGNUM && fixed_regs[regno])
2224 #endif
2225 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2226 && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2227 #endif
2229 && regno != FRAME_POINTER_REGNUM)
2230 || global_regs[regno])
2231 record_last_reg_set_info (insn, regno);
2233 if (! CONST_CALL_P (insn))
2234 record_last_mem_set_info (insn);
2237 note_stores (PATTERN (insn), record_last_set_info, insn);
2240 /* The next pass builds the hash table. */
2242 for (insn = BLOCK_HEAD (bb), in_libcall_block = 0;
2243 insn && insn != NEXT_INSN (BLOCK_END (bb));
2244 insn = NEXT_INSN (insn))
2245 if (INSN_P (insn))
2247 if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2248 in_libcall_block = 1;
2249 else if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
2250 in_libcall_block = 0;
2251 hash_scan_insn (insn, set_p, in_libcall_block);
2255 free (reg_first_set);
2256 free (reg_last_set);
2258 /* Catch bugs early. */
2259 reg_first_set = reg_last_set = 0;
2262 /* Allocate space for the set hash table.
2263 N_INSNS is the number of instructions in the function.
2264 It is used to determine the number of buckets to use. */
2266 static void
2267 alloc_set_hash_table (n_insns)
2268 int n_insns;
2270 int n;
2272 set_hash_table_size = n_insns / 4;
2273 if (set_hash_table_size < 11)
2274 set_hash_table_size = 11;
2276 /* Attempt to maintain efficient use of hash table.
2277 Making it an odd number is simplest for now.
2278 ??? Later take some measurements. */
2279 set_hash_table_size |= 1;
2280 n = set_hash_table_size * sizeof (struct expr *);
2281 set_hash_table = (struct expr **) gmalloc (n);
2284 /* Free things allocated by alloc_set_hash_table. */
2286 static void
2287 free_set_hash_table ()
2289 free (set_hash_table);
2292 /* Compute the hash table for doing copy/const propagation. */
2294 static void
2295 compute_set_hash_table ()
2297 /* Initialize count of number of entries in hash table. */
2298 n_sets = 0;
2299 memset ((char *) set_hash_table, 0,
2300 set_hash_table_size * sizeof (struct expr *));
2302 compute_hash_table (1);
2305 /* Allocate space for the expression hash table.
2306 N_INSNS is the number of instructions in the function.
2307 It is used to determine the number of buckets to use. */
2309 static void
2310 alloc_expr_hash_table (n_insns)
2311 unsigned int n_insns;
2313 int n;
2315 expr_hash_table_size = n_insns / 2;
2316 /* Make sure the amount is usable. */
2317 if (expr_hash_table_size < 11)
2318 expr_hash_table_size = 11;
2320 /* Attempt to maintain efficient use of hash table.
2321 Making it an odd number is simplest for now.
2322 ??? Later take some measurements. */
2323 expr_hash_table_size |= 1;
2324 n = expr_hash_table_size * sizeof (struct expr *);
2325 expr_hash_table = (struct expr **) gmalloc (n);
2328 /* Free things allocated by alloc_expr_hash_table. */
2330 static void
2331 free_expr_hash_table ()
2333 free (expr_hash_table);
2336 /* Compute the hash table for doing GCSE. */
2338 static void
2339 compute_expr_hash_table ()
2341 /* Initialize count of number of entries in hash table. */
2342 n_exprs = 0;
2343 memset ((char *) expr_hash_table, 0,
2344 expr_hash_table_size * sizeof (struct expr *));
2346 compute_hash_table (0);
2349 /* Expression tracking support. */
2351 /* Lookup pattern PAT in the expression table.
2352 The result is a pointer to the table entry, or NULL if not found. */
2354 static struct expr *
2355 lookup_expr (pat)
2356 rtx pat;
2358 int do_not_record_p;
2359 unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p,
2360 expr_hash_table_size);
2361 struct expr *expr;
2363 if (do_not_record_p)
2364 return NULL;
2366 expr = expr_hash_table[hash];
2368 while (expr && ! expr_equiv_p (expr->expr, pat))
2369 expr = expr->next_same_hash;
2371 return expr;
2374 /* Lookup REGNO in the set table. If PAT is non-NULL look for the entry that
2375 matches it, otherwise return the first entry for REGNO. The result is a
2376 pointer to the table entry, or NULL if not found. */
2378 static struct expr *
2379 lookup_set (regno, pat)
2380 unsigned int regno;
2381 rtx pat;
2383 unsigned int hash = hash_set (regno, set_hash_table_size);
2384 struct expr *expr;
2386 expr = set_hash_table[hash];
2388 if (pat)
2390 while (expr && ! expr_equiv_p (expr->expr, pat))
2391 expr = expr->next_same_hash;
2393 else
2395 while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2396 expr = expr->next_same_hash;
2399 return expr;
2402 /* Return the next entry for REGNO in list EXPR. */
2404 static struct expr *
2405 next_set (regno, expr)
2406 unsigned int regno;
2407 struct expr *expr;
2410 expr = expr->next_same_hash;
2411 while (expr && REGNO (SET_DEST (expr->expr)) != regno);
2413 return expr;
2416 /* Reset tables used to keep track of what's still available [since the
2417 start of the block]. */
2419 static void
2420 reset_opr_set_tables ()
2422 /* Maintain a bitmap of which regs have been set since beginning of
2423 the block. */
2424 sbitmap_zero (reg_set_bitmap);
2426 /* Also keep a record of the last instruction to modify memory.
2427 For now this is very trivial, we only record whether any memory
2428 location has been modified. */
2429 mem_last_set = 0;
2432 /* Return non-zero if the operands of X are not set before INSN in
2433 INSN's basic block. */
2435 static int
2436 oprs_not_set_p (x, insn)
2437 rtx x, insn;
2439 int i, j;
2440 enum rtx_code code;
2441 const char *fmt;
2443 if (x == 0)
2444 return 1;
2446 code = GET_CODE (x);
2447 switch (code)
2449 case PC:
2450 case CC0:
2451 case CONST:
2452 case CONST_INT:
2453 case CONST_DOUBLE:
2454 case SYMBOL_REF:
2455 case LABEL_REF:
2456 case ADDR_VEC:
2457 case ADDR_DIFF_VEC:
2458 return 1;
2460 case MEM:
2461 if (mem_last_set != 0)
2462 return 0;
2463 else
2464 return oprs_not_set_p (XEXP (x, 0), insn);
2466 case REG:
2467 return ! TEST_BIT (reg_set_bitmap, REGNO (x));
2469 default:
2470 break;
2473 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2475 if (fmt[i] == 'e')
2477 /* If we are about to do the last recursive call
2478 needed at this level, change it into iteration.
2479 This function is called enough to be worth it. */
2480 if (i == 0)
2481 return oprs_not_set_p (XEXP (x, i), insn);
2483 if (! oprs_not_set_p (XEXP (x, i), insn))
2484 return 0;
2486 else if (fmt[i] == 'E')
2487 for (j = 0; j < XVECLEN (x, i); j++)
2488 if (! oprs_not_set_p (XVECEXP (x, i, j), insn))
2489 return 0;
2492 return 1;
2495 /* Mark things set by a CALL. */
2497 static void
2498 mark_call (insn)
2499 rtx insn;
2501 mem_last_set = INSN_CUID (insn);
2504 /* Mark things set by a SET. */
2506 static void
2507 mark_set (pat, insn)
2508 rtx pat, insn;
2510 rtx dest = SET_DEST (pat);
2512 while (GET_CODE (dest) == SUBREG
2513 || GET_CODE (dest) == ZERO_EXTRACT
2514 || GET_CODE (dest) == SIGN_EXTRACT
2515 || GET_CODE (dest) == STRICT_LOW_PART)
2516 dest = XEXP (dest, 0);
2518 if (GET_CODE (dest) == REG)
2519 SET_BIT (reg_set_bitmap, REGNO (dest));
2520 else if (GET_CODE (dest) == MEM)
2521 mem_last_set = INSN_CUID (insn);
2523 if (GET_CODE (SET_SRC (pat)) == CALL)
2524 mark_call (insn);
2527 /* Record things set by a CLOBBER. */
2529 static void
2530 mark_clobber (pat, insn)
2531 rtx pat, insn;
2533 rtx clob = XEXP (pat, 0);
2535 while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2536 clob = XEXP (clob, 0);
2538 if (GET_CODE (clob) == REG)
2539 SET_BIT (reg_set_bitmap, REGNO (clob));
2540 else
2541 mem_last_set = INSN_CUID (insn);
2544 /* Record things set by INSN.
2545 This data is used by oprs_not_set_p. */
2547 static void
2548 mark_oprs_set (insn)
2549 rtx insn;
2551 rtx pat = PATTERN (insn);
2552 int i;
2554 if (GET_CODE (pat) == SET)
2555 mark_set (pat, insn);
2556 else if (GET_CODE (pat) == PARALLEL)
2557 for (i = 0; i < XVECLEN (pat, 0); i++)
2559 rtx x = XVECEXP (pat, 0, i);
2561 if (GET_CODE (x) == SET)
2562 mark_set (x, insn);
2563 else if (GET_CODE (x) == CLOBBER)
2564 mark_clobber (x, insn);
2565 else if (GET_CODE (x) == CALL)
2566 mark_call (insn);
2569 else if (GET_CODE (pat) == CLOBBER)
2570 mark_clobber (pat, insn);
2571 else if (GET_CODE (pat) == CALL)
2572 mark_call (insn);
2576 /* Classic GCSE reaching definition support. */
2578 /* Allocate reaching def variables. */
2580 static void
2581 alloc_rd_mem (n_blocks, n_insns)
2582 int n_blocks, n_insns;
2584 rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2585 sbitmap_vector_zero (rd_kill, n_basic_blocks);
2587 rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2588 sbitmap_vector_zero (rd_gen, n_basic_blocks);
2590 reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2591 sbitmap_vector_zero (reaching_defs, n_basic_blocks);
2593 rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2594 sbitmap_vector_zero (rd_out, n_basic_blocks);
2597 /* Free reaching def variables. */
2599 static void
2600 free_rd_mem ()
2602 free (rd_kill);
2603 free (rd_gen);
2604 free (reaching_defs);
2605 free (rd_out);
2608 /* Add INSN to the kills of BB. REGNO, set in BB, is killed by INSN. */
2610 static void
2611 handle_rd_kill_set (insn, regno, bb)
2612 rtx insn;
2613 int regno, bb;
2615 struct reg_set *this_reg;
2617 for (this_reg = reg_set_table[regno]; this_reg; this_reg = this_reg ->next)
2618 if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn))
2619 SET_BIT (rd_kill[bb], INSN_CUID (this_reg->insn));
2622 /* Compute the set of kill's for reaching definitions. */
2624 static void
2625 compute_kill_rd ()
2627 int bb, cuid;
2628 int regno, i;
2630 /* For each block
2631 For each set bit in `gen' of the block (i.e each insn which
2632 generates a definition in the block)
2633 Call the reg set by the insn corresponding to that bit regx
2634 Look at the linked list starting at reg_set_table[regx]
2635 For each setting of regx in the linked list, which is not in
2636 this block
2637 Set the bit in `kill' corresponding to that insn. */
2638 for (bb = 0; bb < n_basic_blocks; bb++)
2639 for (cuid = 0; cuid < max_cuid; cuid++)
2640 if (TEST_BIT (rd_gen[bb], cuid))
2642 rtx insn = CUID_INSN (cuid);
2643 rtx pat = PATTERN (insn);
2645 if (GET_CODE (insn) == CALL_INSN)
2647 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2649 if ((call_used_regs[regno]
2650 && regno != STACK_POINTER_REGNUM
2651 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2652 && regno != HARD_FRAME_POINTER_REGNUM
2653 #endif
2654 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2655 && ! (regno == ARG_POINTER_REGNUM
2656 && fixed_regs[regno])
2657 #endif
2658 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2659 && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2660 #endif
2661 && regno != FRAME_POINTER_REGNUM)
2662 || global_regs[regno])
2663 handle_rd_kill_set (insn, regno, bb);
2667 if (GET_CODE (pat) == PARALLEL)
2669 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
2671 enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i));
2673 if ((code == SET || code == CLOBBER)
2674 && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG)
2675 handle_rd_kill_set (insn,
2676 REGNO (XEXP (XVECEXP (pat, 0, i), 0)),
2677 bb);
2680 else if (GET_CODE (pat) == SET && GET_CODE (SET_DEST (pat)) == REG)
2681 /* Each setting of this register outside of this block
2682 must be marked in the set of kills in this block. */
2683 handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), bb);
2687 /* Compute the reaching definitions as in
2688 Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
2689 Chapter 10. It is the same algorithm as used for computing available
2690 expressions but applied to the gens and kills of reaching definitions. */
2692 static void
2693 compute_rd ()
2695 int bb, changed, passes;
2697 for (bb = 0; bb < n_basic_blocks; bb++)
2698 sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/);
2700 passes = 0;
2701 changed = 1;
2702 while (changed)
2704 changed = 0;
2705 for (bb = 0; bb < n_basic_blocks; bb++)
2707 sbitmap_union_of_preds (reaching_defs[bb], rd_out, bb);
2708 changed |= sbitmap_union_of_diff (rd_out[bb], rd_gen[bb],
2709 reaching_defs[bb], rd_kill[bb]);
2711 passes++;
2714 if (gcse_file)
2715 fprintf (gcse_file, "reaching def computation: %d passes\n", passes);
2718 /* Classic GCSE available expression support. */
2720 /* Allocate memory for available expression computation. */
2722 static void
2723 alloc_avail_expr_mem (n_blocks, n_exprs)
2724 int n_blocks, n_exprs;
2726 ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2727 sbitmap_vector_zero (ae_kill, n_basic_blocks);
2729 ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2730 sbitmap_vector_zero (ae_gen, n_basic_blocks);
2732 ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2733 sbitmap_vector_zero (ae_in, n_basic_blocks);
2735 ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2736 sbitmap_vector_zero (ae_out, n_basic_blocks);
2739 static void
2740 free_avail_expr_mem ()
2742 free (ae_kill);
2743 free (ae_gen);
2744 free (ae_in);
2745 free (ae_out);
2748 /* Compute the set of available expressions generated in each basic block. */
2750 static void
2751 compute_ae_gen ()
2753 unsigned int i;
2754 struct expr *expr;
2755 struct occr *occr;
2757 /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
2758 This is all we have to do because an expression is not recorded if it
2759 is not available, and the only expressions we want to work with are the
2760 ones that are recorded. */
2761 for (i = 0; i < expr_hash_table_size; i++)
2762 for (expr = expr_hash_table[i]; expr != 0; expr = expr->next_same_hash)
2763 for (occr = expr->avail_occr; occr != 0; occr = occr->next)
2764 SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index);
2767 /* Return non-zero if expression X is killed in BB. */
2769 static int
2770 expr_killed_p (x, bb)
2771 rtx x;
2772 int bb;
2774 int i, j;
2775 enum rtx_code code;
2776 const char *fmt;
2778 if (x == 0)
2779 return 1;
2781 code = GET_CODE (x);
2782 switch (code)
2784 case REG:
2785 return TEST_BIT (reg_set_in_block[bb], REGNO (x));
2787 case MEM:
2788 if (mem_set_in_block[bb])
2789 return 1;
2790 else
2791 return expr_killed_p (XEXP (x, 0), bb);
2793 case PC:
2794 case CC0: /*FIXME*/
2795 case CONST:
2796 case CONST_INT:
2797 case CONST_DOUBLE:
2798 case SYMBOL_REF:
2799 case LABEL_REF:
2800 case ADDR_VEC:
2801 case ADDR_DIFF_VEC:
2802 return 0;
2804 default:
2805 break;
2808 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2810 if (fmt[i] == 'e')
2812 /* If we are about to do the last recursive call
2813 needed at this level, change it into iteration.
2814 This function is called enough to be worth it. */
2815 if (i == 0)
2816 return expr_killed_p (XEXP (x, i), bb);
2817 else if (expr_killed_p (XEXP (x, i), bb))
2818 return 1;
2820 else if (fmt[i] == 'E')
2821 for (j = 0; j < XVECLEN (x, i); j++)
2822 if (expr_killed_p (XVECEXP (x, i, j), bb))
2823 return 1;
2826 return 0;
2829 /* Compute the set of available expressions killed in each basic block. */
2831 static void
2832 compute_ae_kill (ae_gen, ae_kill)
2833 sbitmap *ae_gen, *ae_kill;
2835 int bb;
2836 unsigned int i;
2837 struct expr *expr;
2839 for (bb = 0; bb < n_basic_blocks; bb++)
2840 for (i = 0; i < expr_hash_table_size; i++)
2841 for (expr = expr_hash_table[i]; expr; expr = expr->next_same_hash)
2843 /* Skip EXPR if generated in this block. */
2844 if (TEST_BIT (ae_gen[bb], expr->bitmap_index))
2845 continue;
2847 if (expr_killed_p (expr->expr, bb))
2848 SET_BIT (ae_kill[bb], expr->bitmap_index);
2852 /* Actually perform the Classic GCSE optimizations. */
2854 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
2856 CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
2857 as a positive reach. We want to do this when there are two computations
2858 of the expression in the block.
2860 VISITED is a pointer to a working buffer for tracking which BB's have
2861 been visited. It is NULL for the top-level call.
2863 We treat reaching expressions that go through blocks containing the same
2864 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
2865 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2866 2 as not reaching. The intent is to improve the probability of finding
2867 only one reaching expression and to reduce register lifetimes by picking
2868 the closest such expression. */
2870 static int
2871 expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited)
2872 struct occr *occr;
2873 struct expr *expr;
2874 int bb;
2875 int check_self_loop;
2876 char *visited;
2878 edge pred;
2880 for (pred = BASIC_BLOCK(bb)->pred; pred != NULL; pred = pred->pred_next)
2882 int pred_bb = pred->src->index;
2884 if (visited[pred_bb])
2885 /* This predecessor has already been visited. Nothing to do. */
2887 else if (pred_bb == bb)
2889 /* BB loops on itself. */
2890 if (check_self_loop
2891 && TEST_BIT (ae_gen[pred_bb], expr->bitmap_index)
2892 && BLOCK_NUM (occr->insn) == pred_bb)
2893 return 1;
2895 visited[pred_bb] = 1;
2898 /* Ignore this predecessor if it kills the expression. */
2899 else if (TEST_BIT (ae_kill[pred_bb], expr->bitmap_index))
2900 visited[pred_bb] = 1;
2902 /* Does this predecessor generate this expression? */
2903 else if (TEST_BIT (ae_gen[pred_bb], expr->bitmap_index))
2905 /* Is this the occurrence we're looking for?
2906 Note that there's only one generating occurrence per block
2907 so we just need to check the block number. */
2908 if (BLOCK_NUM (occr->insn) == pred_bb)
2909 return 1;
2911 visited[pred_bb] = 1;
2914 /* Neither gen nor kill. */
2915 else
2917 visited[pred_bb] = 1;
2918 if (expr_reaches_here_p_work (occr, expr, pred_bb, check_self_loop,
2919 visited))
2921 return 1;
2925 /* All paths have been checked. */
2926 return 0;
2929 /* This wrapper for expr_reaches_here_p_work() is to ensure that any
2930 memory allocated for that function is returned. */
2932 static int
2933 expr_reaches_here_p (occr, expr, bb, check_self_loop)
2934 struct occr *occr;
2935 struct expr *expr;
2936 int bb;
2937 int check_self_loop;
2939 int rval;
2940 char *visited = (char *) xcalloc (n_basic_blocks, 1);
2942 rval = expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited);
2944 free (visited);
2945 return rval;
2948 /* Return the instruction that computes EXPR that reaches INSN's basic block.
2949 If there is more than one such instruction, return NULL.
2951 Called only by handle_avail_expr. */
2953 static rtx
2954 computing_insn (expr, insn)
2955 struct expr *expr;
2956 rtx insn;
2958 int bb = BLOCK_NUM (insn);
2960 if (expr->avail_occr->next == NULL)
2962 if (BLOCK_NUM (expr->avail_occr->insn) == bb)
2963 /* The available expression is actually itself
2964 (i.e. a loop in the flow graph) so do nothing. */
2965 return NULL;
2967 /* (FIXME) Case that we found a pattern that was created by
2968 a substitution that took place. */
2969 return expr->avail_occr->insn;
2971 else
2973 /* Pattern is computed more than once.
2974 Search backwards from this insn to see how many of these
2975 computations actually reach this insn. */
2976 struct occr *occr;
2977 rtx insn_computes_expr = NULL;
2978 int can_reach = 0;
2980 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
2982 if (BLOCK_NUM (occr->insn) == bb)
2984 /* The expression is generated in this block.
2985 The only time we care about this is when the expression
2986 is generated later in the block [and thus there's a loop].
2987 We let the normal cse pass handle the other cases. */
2988 if (INSN_CUID (insn) < INSN_CUID (occr->insn)
2989 && expr_reaches_here_p (occr, expr, bb, 1))
2991 can_reach++;
2992 if (can_reach > 1)
2993 return NULL;
2995 insn_computes_expr = occr->insn;
2998 else if (expr_reaches_here_p (occr, expr, bb, 0))
3000 can_reach++;
3001 if (can_reach > 1)
3002 return NULL;
3004 insn_computes_expr = occr->insn;
3008 if (insn_computes_expr == NULL)
3009 abort ();
3011 return insn_computes_expr;
3015 /* Return non-zero if the definition in DEF_INSN can reach INSN.
3016 Only called by can_disregard_other_sets. */
3018 static int
3019 def_reaches_here_p (insn, def_insn)
3020 rtx insn, def_insn;
3022 rtx reg;
3024 if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn)))
3025 return 1;
3027 if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn))
3029 if (INSN_CUID (def_insn) < INSN_CUID (insn))
3031 if (GET_CODE (PATTERN (def_insn)) == PARALLEL)
3032 return 1;
3033 else if (GET_CODE (PATTERN (def_insn)) == CLOBBER)
3034 reg = XEXP (PATTERN (def_insn), 0);
3035 else if (GET_CODE (PATTERN (def_insn)) == SET)
3036 reg = SET_DEST (PATTERN (def_insn));
3037 else
3038 abort ();
3040 return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn);
3042 else
3043 return 0;
3046 return 0;
3049 /* Return non-zero if *ADDR_THIS_REG can only have one value at INSN. The
3050 value returned is the number of definitions that reach INSN. Returning a
3051 value of zero means that [maybe] more than one definition reaches INSN and
3052 the caller can't perform whatever optimization it is trying. i.e. it is
3053 always safe to return zero. */
3055 static int
3056 can_disregard_other_sets (addr_this_reg, insn, for_combine)
3057 struct reg_set **addr_this_reg;
3058 rtx insn;
3059 int for_combine;
3061 int number_of_reaching_defs = 0;
3062 struct reg_set *this_reg;
3064 for (this_reg = *addr_this_reg; this_reg != 0; this_reg = this_reg->next)
3065 if (def_reaches_here_p (insn, this_reg->insn))
3067 number_of_reaching_defs++;
3068 /* Ignore parallels for now. */
3069 if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL)
3070 return 0;
3072 if (!for_combine
3073 && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER
3074 || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3075 SET_SRC (PATTERN (insn)))))
3076 /* A setting of the reg to a different value reaches INSN. */
3077 return 0;
3079 if (number_of_reaching_defs > 1)
3081 /* If in this setting the value the register is being set to is
3082 equal to the previous value the register was set to and this
3083 setting reaches the insn we are trying to do the substitution
3084 on then we are ok. */
3085 if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER)
3086 return 0;
3087 else if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3088 SET_SRC (PATTERN (insn))))
3089 return 0;
3092 *addr_this_reg = this_reg;
3095 return number_of_reaching_defs;
3098 /* Expression computed by insn is available and the substitution is legal,
3099 so try to perform the substitution.
3101 The result is non-zero if any changes were made. */
3103 static int
3104 handle_avail_expr (insn, expr)
3105 rtx insn;
3106 struct expr *expr;
3108 rtx pat, insn_computes_expr;
3109 rtx to;
3110 struct reg_set *this_reg;
3111 int found_setting, use_src;
3112 int changed = 0;
3114 /* We only handle the case where one computation of the expression
3115 reaches this instruction. */
3116 insn_computes_expr = computing_insn (expr, insn);
3117 if (insn_computes_expr == NULL)
3118 return 0;
3120 found_setting = 0;
3121 use_src = 0;
3123 /* At this point we know only one computation of EXPR outside of this
3124 block reaches this insn. Now try to find a register that the
3125 expression is computed into. */
3126 if (GET_CODE (SET_SRC (PATTERN (insn_computes_expr))) == REG)
3128 /* This is the case when the available expression that reaches
3129 here has already been handled as an available expression. */
3130 unsigned int regnum_for_replacing
3131 = REGNO (SET_SRC (PATTERN (insn_computes_expr)));
3133 /* If the register was created by GCSE we can't use `reg_set_table',
3134 however we know it's set only once. */
3135 if (regnum_for_replacing >= max_gcse_regno
3136 /* If the register the expression is computed into is set only once,
3137 or only one set reaches this insn, we can use it. */
3138 || (((this_reg = reg_set_table[regnum_for_replacing]),
3139 this_reg->next == NULL)
3140 || can_disregard_other_sets (&this_reg, insn, 0)))
3142 use_src = 1;
3143 found_setting = 1;
3147 if (!found_setting)
3149 unsigned int regnum_for_replacing
3150 = REGNO (SET_DEST (PATTERN (insn_computes_expr)));
3152 /* This shouldn't happen. */
3153 if (regnum_for_replacing >= max_gcse_regno)
3154 abort ();
3156 this_reg = reg_set_table[regnum_for_replacing];
3158 /* If the register the expression is computed into is set only once,
3159 or only one set reaches this insn, use it. */
3160 if (this_reg->next == NULL
3161 || can_disregard_other_sets (&this_reg, insn, 0))
3162 found_setting = 1;
3165 if (found_setting)
3167 pat = PATTERN (insn);
3168 if (use_src)
3169 to = SET_SRC (PATTERN (insn_computes_expr));
3170 else
3171 to = SET_DEST (PATTERN (insn_computes_expr));
3172 changed = validate_change (insn, &SET_SRC (pat), to, 0);
3174 /* We should be able to ignore the return code from validate_change but
3175 to play it safe we check. */
3176 if (changed)
3178 gcse_subst_count++;
3179 if (gcse_file != NULL)
3181 fprintf (gcse_file, "GCSE: Replacing the source in insn %d with",
3182 INSN_UID (insn));
3183 fprintf (gcse_file, " reg %d %s insn %d\n",
3184 REGNO (to), use_src ? "from" : "set in",
3185 INSN_UID (insn_computes_expr));
3190 /* The register that the expr is computed into is set more than once. */
3191 else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3193 /* Insert an insn after insnx that copies the reg set in insnx
3194 into a new pseudo register call this new register REGN.
3195 From insnb until end of basic block or until REGB is set
3196 replace all uses of REGB with REGN. */
3197 rtx new_insn;
3199 to = gen_reg_rtx (GET_MODE (SET_DEST (PATTERN (insn_computes_expr))));
3201 /* Generate the new insn. */
3202 /* ??? If the change fails, we return 0, even though we created
3203 an insn. I think this is ok. */
3204 new_insn
3205 = emit_insn_after (gen_rtx_SET (VOIDmode, to,
3206 SET_DEST (PATTERN
3207 (insn_computes_expr))),
3208 insn_computes_expr);
3210 /* Keep block number table up to date. */
3211 set_block_num (new_insn, BLOCK_NUM (insn_computes_expr));
3213 /* Keep register set table up to date. */
3214 record_one_set (REGNO (to), new_insn);
3216 gcse_create_count++;
3217 if (gcse_file != NULL)
3219 fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d",
3220 INSN_UID (NEXT_INSN (insn_computes_expr)),
3221 REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))));
3222 fprintf (gcse_file, ", computed in insn %d,\n",
3223 INSN_UID (insn_computes_expr));
3224 fprintf (gcse_file, " into newly allocated reg %d\n",
3225 REGNO (to));
3228 pat = PATTERN (insn);
3230 /* Do register replacement for INSN. */
3231 changed = validate_change (insn, &SET_SRC (pat),
3232 SET_DEST (PATTERN
3233 (NEXT_INSN (insn_computes_expr))),
3236 /* We should be able to ignore the return code from validate_change but
3237 to play it safe we check. */
3238 if (changed)
3240 gcse_subst_count++;
3241 if (gcse_file != NULL)
3243 fprintf (gcse_file,
3244 "GCSE: Replacing the source in insn %d with reg %d ",
3245 INSN_UID (insn),
3246 REGNO (SET_DEST (PATTERN (NEXT_INSN
3247 (insn_computes_expr)))));
3248 fprintf (gcse_file, "set in insn %d\n",
3249 INSN_UID (insn_computes_expr));
3254 return changed;
3257 /* Perform classic GCSE. This is called by one_classic_gcse_pass after all
3258 the dataflow analysis has been done.
3260 The result is non-zero if a change was made. */
3262 static int
3263 classic_gcse ()
3265 int bb, changed;
3266 rtx insn;
3268 /* Note we start at block 1. */
3270 changed = 0;
3271 for (bb = 1; bb < n_basic_blocks; bb++)
3273 /* Reset tables used to keep track of what's still valid [since the
3274 start of the block]. */
3275 reset_opr_set_tables ();
3277 for (insn = BLOCK_HEAD (bb);
3278 insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3279 insn = NEXT_INSN (insn))
3281 /* Is insn of form (set (pseudo-reg) ...)? */
3282 if (GET_CODE (insn) == INSN
3283 && GET_CODE (PATTERN (insn)) == SET
3284 && GET_CODE (SET_DEST (PATTERN (insn))) == REG
3285 && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER)
3287 rtx pat = PATTERN (insn);
3288 rtx src = SET_SRC (pat);
3289 struct expr *expr;
3291 if (want_to_gcse_p (src)
3292 /* Is the expression recorded? */
3293 && ((expr = lookup_expr (src)) != NULL)
3294 /* Is the expression available [at the start of the
3295 block]? */
3296 && TEST_BIT (ae_in[bb], expr->bitmap_index)
3297 /* Are the operands unchanged since the start of the
3298 block? */
3299 && oprs_not_set_p (src, insn))
3300 changed |= handle_avail_expr (insn, expr);
3303 /* Keep track of everything modified by this insn. */
3304 /* ??? Need to be careful w.r.t. mods done to INSN. */
3305 if (INSN_P (insn))
3306 mark_oprs_set (insn);
3310 return changed;
3313 /* Top level routine to perform one classic GCSE pass.
3315 Return non-zero if a change was made. */
3317 static int
3318 one_classic_gcse_pass (pass)
3319 int pass;
3321 int changed = 0;
3323 gcse_subst_count = 0;
3324 gcse_create_count = 0;
3326 alloc_expr_hash_table (max_cuid);
3327 alloc_rd_mem (n_basic_blocks, max_cuid);
3328 compute_expr_hash_table ();
3329 if (gcse_file)
3330 dump_hash_table (gcse_file, "Expression", expr_hash_table,
3331 expr_hash_table_size, n_exprs);
3333 if (n_exprs > 0)
3335 compute_kill_rd ();
3336 compute_rd ();
3337 alloc_avail_expr_mem (n_basic_blocks, n_exprs);
3338 compute_ae_gen ();
3339 compute_ae_kill (ae_gen, ae_kill);
3340 compute_available (ae_gen, ae_kill, ae_out, ae_in);
3341 changed = classic_gcse ();
3342 free_avail_expr_mem ();
3345 free_rd_mem ();
3346 free_expr_hash_table ();
3348 if (gcse_file)
3350 fprintf (gcse_file, "\n");
3351 fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs,",
3352 current_function_name, pass, bytes_used, gcse_subst_count);
3353 fprintf (gcse_file, "%d insns created\n", gcse_create_count);
3356 return changed;
3359 /* Compute copy/constant propagation working variables. */
3361 /* Local properties of assignments. */
3362 static sbitmap *cprop_pavloc;
3363 static sbitmap *cprop_absaltered;
3365 /* Global properties of assignments (computed from the local properties). */
3366 static sbitmap *cprop_avin;
3367 static sbitmap *cprop_avout;
3369 /* Allocate vars used for copy/const propagation. N_BLOCKS is the number of
3370 basic blocks. N_SETS is the number of sets. */
3372 static void
3373 alloc_cprop_mem (n_blocks, n_sets)
3374 int n_blocks, n_sets;
3376 cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
3377 cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
3379 cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
3380 cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
3383 /* Free vars used by copy/const propagation. */
3385 static void
3386 free_cprop_mem ()
3388 free (cprop_pavloc);
3389 free (cprop_absaltered);
3390 free (cprop_avin);
3391 free (cprop_avout);
3394 /* For each block, compute whether X is transparent. X is either an
3395 expression or an assignment [though we don't care which, for this context
3396 an assignment is treated as an expression]. For each block where an
3397 element of X is modified, set (SET_P == 1) or reset (SET_P == 0) the INDX
3398 bit in BMAP. */
3400 static void
3401 compute_transp (x, indx, bmap, set_p)
3402 rtx x;
3403 int indx;
3404 sbitmap *bmap;
3405 int set_p;
3407 int bb, i, j;
3408 enum rtx_code code;
3409 reg_set *r;
3410 const char *fmt;
3412 /* repeat is used to turn tail-recursion into iteration since GCC
3413 can't do it when there's no return value. */
3414 repeat:
3416 if (x == 0)
3417 return;
3419 code = GET_CODE (x);
3420 switch (code)
3422 case REG:
3423 if (set_p)
3425 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3427 for (bb = 0; bb < n_basic_blocks; bb++)
3428 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3429 SET_BIT (bmap[bb], indx);
3431 else
3433 for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3434 SET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3437 else
3439 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3441 for (bb = 0; bb < n_basic_blocks; bb++)
3442 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3443 RESET_BIT (bmap[bb], indx);
3445 else
3447 for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3448 RESET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3452 return;
3454 case MEM:
3455 if (set_p)
3457 for (bb = 0; bb < n_basic_blocks; bb++)
3458 if (mem_set_in_block[bb])
3459 SET_BIT (bmap[bb], indx);
3461 else
3463 for (bb = 0; bb < n_basic_blocks; bb++)
3464 if (mem_set_in_block[bb])
3465 RESET_BIT (bmap[bb], indx);
3468 x = XEXP (x, 0);
3469 goto repeat;
3471 case PC:
3472 case CC0: /*FIXME*/
3473 case CONST:
3474 case CONST_INT:
3475 case CONST_DOUBLE:
3476 case SYMBOL_REF:
3477 case LABEL_REF:
3478 case ADDR_VEC:
3479 case ADDR_DIFF_VEC:
3480 return;
3482 default:
3483 break;
3486 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3488 if (fmt[i] == 'e')
3490 /* If we are about to do the last recursive call
3491 needed at this level, change it into iteration.
3492 This function is called enough to be worth it. */
3493 if (i == 0)
3495 x = XEXP (x, i);
3496 goto repeat;
3499 compute_transp (XEXP (x, i), indx, bmap, set_p);
3501 else if (fmt[i] == 'E')
3502 for (j = 0; j < XVECLEN (x, i); j++)
3503 compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
3507 /* Top level routine to do the dataflow analysis needed by copy/const
3508 propagation. */
3510 static void
3511 compute_cprop_data ()
3513 compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, 1);
3514 compute_available (cprop_pavloc, cprop_absaltered,
3515 cprop_avout, cprop_avin);
3518 /* Copy/constant propagation. */
3520 /* Maximum number of register uses in an insn that we handle. */
3521 #define MAX_USES 8
3523 /* Table of uses found in an insn.
3524 Allocated statically to avoid alloc/free complexity and overhead. */
3525 static struct reg_use reg_use_table[MAX_USES];
3527 /* Index into `reg_use_table' while building it. */
3528 static int reg_use_count;
3530 /* Set up a list of register numbers used in INSN. The found uses are stored
3531 in `reg_use_table'. `reg_use_count' is initialized to zero before entry,
3532 and contains the number of uses in the table upon exit.
3534 ??? If a register appears multiple times we will record it multiple times.
3535 This doesn't hurt anything but it will slow things down. */
3537 static void
3538 find_used_regs (x)
3539 rtx x;
3541 int i, j;
3542 enum rtx_code code;
3543 const char *fmt;
3545 /* repeat is used to turn tail-recursion into iteration since GCC
3546 can't do it when there's no return value. */
3547 repeat:
3549 if (x == 0)
3550 return;
3552 code = GET_CODE (x);
3553 switch (code)
3555 case REG:
3556 if (reg_use_count == MAX_USES)
3557 return;
3559 reg_use_table[reg_use_count].reg_rtx = x;
3560 reg_use_count++;
3561 return;
3563 case MEM:
3564 x = XEXP (x, 0);
3565 goto repeat;
3567 case PC:
3568 case CC0:
3569 case CONST:
3570 case CONST_INT:
3571 case CONST_DOUBLE:
3572 case SYMBOL_REF:
3573 case LABEL_REF:
3574 case CLOBBER:
3575 case ADDR_VEC:
3576 case ADDR_DIFF_VEC:
3577 case ASM_INPUT: /*FIXME*/
3578 return;
3580 case SET:
3581 if (GET_CODE (SET_DEST (x)) == MEM)
3582 find_used_regs (SET_DEST (x));
3583 x = SET_SRC (x);
3584 goto repeat;
3586 default:
3587 break;
3590 /* Recursively scan the operands of this expression. */
3592 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3594 if (fmt[i] == 'e')
3596 /* If we are about to do the last recursive call
3597 needed at this level, change it into iteration.
3598 This function is called enough to be worth it. */
3599 if (i == 0)
3601 x = XEXP (x, 0);
3602 goto repeat;
3605 find_used_regs (XEXP (x, i));
3607 else if (fmt[i] == 'E')
3608 for (j = 0; j < XVECLEN (x, i); j++)
3609 find_used_regs (XVECEXP (x, i, j));
3613 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3614 Returns non-zero is successful. */
3616 static int
3617 try_replace_reg (from, to, insn)
3618 rtx from, to, insn;
3620 rtx note;
3621 rtx src;
3622 int success;
3623 rtx set;
3625 note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3627 if (!note)
3628 note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3630 /* If this fails we could try to simplify the result of the
3631 replacement and attempt to recognize the simplified insn.
3633 But we need a general simplify_rtx that doesn't have pass
3634 specific state variables. I'm not aware of one at the moment. */
3636 success = validate_replace_src (from, to, insn);
3637 set = single_set (insn);
3639 /* We've failed to do replacement. Try to add REG_EQUAL note to not loose
3640 information. */
3641 if (!success && !note)
3643 if (!set)
3644 return 0;
3646 note = REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL,
3647 copy_rtx (SET_SRC (set)),
3648 REG_NOTES (insn));
3651 /* Always do the replacement in REQ_EQUAL and REG_EQUIV notes. Also
3652 try to simplify them. */
3653 if (note)
3655 rtx simplified;
3657 if (!validate_replace_rtx_subexp (from, to, insn, &XEXP (note, 0)))
3658 abort();
3660 src = XEXP (note, 0);
3662 /* Try to simplify resulting note. */
3663 simplified = simplify_rtx (src);
3664 if (simplified)
3666 src = simplified;
3667 XEXP (note, 0) = src;
3670 /* REG_EQUAL may get simplified into register.
3671 We don't allow that. Remove that note. This code ought
3672 not to hapen, because previous code ought to syntetize
3673 reg-reg move, but be on the safe side. */
3674 else if (REG_P (src))
3675 remove_note (insn, note);
3677 return success;
3680 /* Find a set of REGNOs that are available on entry to INSN's block. Returns
3681 NULL no such set is found. */
3683 static struct expr *
3684 find_avail_set (regno, insn)
3685 int regno;
3686 rtx insn;
3688 /* SET1 contains the last set found that can be returned to the caller for
3689 use in a substitution. */
3690 struct expr *set1 = 0;
3692 /* Loops are not possible here. To get a loop we would need two sets
3693 available at the start of the block containing INSN. ie we would
3694 need two sets like this available at the start of the block:
3696 (set (reg X) (reg Y))
3697 (set (reg Y) (reg X))
3699 This can not happen since the set of (reg Y) would have killed the
3700 set of (reg X) making it unavailable at the start of this block. */
3701 while (1)
3703 rtx src;
3704 struct expr *set = lookup_set (regno, NULL_RTX);
3706 /* Find a set that is available at the start of the block
3707 which contains INSN. */
3708 while (set)
3710 if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
3711 break;
3712 set = next_set (regno, set);
3715 /* If no available set was found we've reached the end of the
3716 (possibly empty) copy chain. */
3717 if (set == 0)
3718 break;
3720 if (GET_CODE (set->expr) != SET)
3721 abort ();
3723 src = SET_SRC (set->expr);
3725 /* We know the set is available.
3726 Now check that SRC is ANTLOC (i.e. none of the source operands
3727 have changed since the start of the block).
3729 If the source operand changed, we may still use it for the next
3730 iteration of this loop, but we may not use it for substitutions. */
3732 if (CONSTANT_P (src) || oprs_not_set_p (src, insn))
3733 set1 = set;
3735 /* If the source of the set is anything except a register, then
3736 we have reached the end of the copy chain. */
3737 if (GET_CODE (src) != REG)
3738 break;
3740 /* Follow the copy chain, ie start another iteration of the loop
3741 and see if we have an available copy into SRC. */
3742 regno = REGNO (src);
3745 /* SET1 holds the last set that was available and anticipatable at
3746 INSN. */
3747 return set1;
3750 /* Subroutine of cprop_insn that tries to propagate constants into
3751 JUMP_INSNS. INSN must be a conditional jump; COPY is a copy of it
3752 that we can use for substitutions.
3753 REG_USED is the use we will try to replace, SRC is the constant we
3754 will try to substitute for it.
3755 Returns nonzero if a change was made. */
3757 static int
3758 cprop_jump (insn, copy, reg_used, src)
3759 rtx insn, copy;
3760 struct reg_use *reg_used;
3761 rtx src;
3763 rtx set = PATTERN (copy);
3764 rtx temp;
3766 /* Replace the register with the appropriate constant. */
3767 replace_rtx (SET_SRC (set), reg_used->reg_rtx, src);
3769 temp = simplify_ternary_operation (GET_CODE (SET_SRC (set)),
3770 GET_MODE (SET_SRC (set)),
3771 GET_MODE (XEXP (SET_SRC (set), 0)),
3772 XEXP (SET_SRC (set), 0),
3773 XEXP (SET_SRC (set), 1),
3774 XEXP (SET_SRC (set), 2));
3776 /* If no simplification can be made, then try the next
3777 register. */
3778 if (temp == 0)
3779 return 0;
3781 SET_SRC (set) = temp;
3783 /* That may have changed the structure of TEMP, so
3784 force it to be rerecognized if it has not turned
3785 into a nop or unconditional jump. */
3787 INSN_CODE (copy) = -1;
3788 if ((SET_DEST (set) == pc_rtx
3789 && (SET_SRC (set) == pc_rtx
3790 || GET_CODE (SET_SRC (set)) == LABEL_REF))
3791 || recog (PATTERN (copy), copy, NULL) >= 0)
3793 /* This has either become an unconditional jump
3794 or a nop-jump. We'd like to delete nop jumps
3795 here, but doing so confuses gcse. So we just
3796 make the replacement and let later passes
3797 sort things out. */
3798 PATTERN (insn) = set;
3799 INSN_CODE (insn) = -1;
3801 /* One less use of the label this insn used to jump to
3802 if we turned this into a NOP jump. */
3803 if (SET_SRC (set) == pc_rtx && JUMP_LABEL (insn) != 0)
3804 --LABEL_NUSES (JUMP_LABEL (insn));
3806 /* If this has turned into an unconditional jump,
3807 then put a barrier after it so that the unreachable
3808 code will be deleted. */
3809 if (GET_CODE (SET_SRC (set)) == LABEL_REF)
3810 emit_barrier_after (insn);
3812 run_jump_opt_after_gcse = 1;
3814 const_prop_count++;
3815 if (gcse_file != NULL)
3817 fprintf (gcse_file,
3818 "CONST-PROP: Replacing reg %d in insn %d with constant ",
3819 REGNO (reg_used->reg_rtx), INSN_UID (insn));
3820 print_rtl (gcse_file, src);
3821 fprintf (gcse_file, "\n");
3824 return 1;
3826 return 0;
3829 #ifdef HAVE_cc0
3831 /* Subroutine of cprop_insn that tries to propagate constants into JUMP_INSNS
3832 for machines that have CC0. INSN is a single set that stores into CC0;
3833 the insn following it is a conditional jump. REG_USED is the use we will
3834 try to replace, SRC is the constant we will try to substitute for it.
3835 Returns nonzero if a change was made. */
3837 static int
3838 cprop_cc0_jump (insn, reg_used, src)
3839 rtx insn;
3840 struct reg_use *reg_used;
3841 rtx src;
3843 rtx jump = NEXT_INSN (insn);
3844 rtx copy = copy_rtx (jump);
3845 rtx set = PATTERN (copy);
3847 /* We need to copy the source of the cc0 setter, as cprop_jump is going to
3848 substitute into it. */
3849 replace_rtx (SET_SRC (set), cc0_rtx, copy_rtx (SET_SRC (PATTERN (insn))));
3850 if (! cprop_jump (jump, copy, reg_used, src))
3851 return 0;
3853 /* If we succeeded, delete the cc0 setter. */
3854 PUT_CODE (insn, NOTE);
3855 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3856 NOTE_SOURCE_FILE (insn) = 0;
3857 return 1;
3859 #endif
3861 /* Perform constant and copy propagation on INSN.
3862 The result is non-zero if a change was made. */
3864 static int
3865 cprop_insn (insn, alter_jumps)
3866 rtx insn;
3867 int alter_jumps;
3869 struct reg_use *reg_used;
3870 int changed = 0;
3871 rtx note;
3873 /* Only propagate into SETs. Note that a conditional jump is a
3874 SET with pc_rtx as the destination. */
3875 if ((GET_CODE (insn) != INSN
3876 && GET_CODE (insn) != JUMP_INSN)
3877 || GET_CODE (PATTERN (insn)) != SET)
3878 return 0;
3880 reg_use_count = 0;
3881 find_used_regs (PATTERN (insn));
3883 note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3884 if (!note)
3885 note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3887 /* We may win even when propagating constants into notes. */
3888 if (note)
3889 find_used_regs (XEXP (note, 0));
3891 for (reg_used = &reg_use_table[0]; reg_use_count > 0;
3892 reg_used++, reg_use_count--)
3894 unsigned int regno = REGNO (reg_used->reg_rtx);
3895 rtx pat, src;
3896 struct expr *set;
3898 /* Ignore registers created by GCSE.
3899 We do this because ... */
3900 if (regno >= max_gcse_regno)
3901 continue;
3903 /* If the register has already been set in this block, there's
3904 nothing we can do. */
3905 if (! oprs_not_set_p (reg_used->reg_rtx, insn))
3906 continue;
3908 /* Find an assignment that sets reg_used and is available
3909 at the start of the block. */
3910 set = find_avail_set (regno, insn);
3911 if (! set)
3912 continue;
3914 pat = set->expr;
3915 /* ??? We might be able to handle PARALLELs. Later. */
3916 if (GET_CODE (pat) != SET)
3917 abort ();
3919 src = SET_SRC (pat);
3921 /* Constant propagation. */
3922 if (GET_CODE (src) == CONST_INT || GET_CODE (src) == CONST_DOUBLE
3923 || GET_CODE (src) == SYMBOL_REF)
3925 /* Handle normal insns first. */
3926 if (GET_CODE (insn) == INSN
3927 && try_replace_reg (reg_used->reg_rtx, src, insn))
3929 changed = 1;
3930 const_prop_count++;
3931 if (gcse_file != NULL)
3933 fprintf (gcse_file, "CONST-PROP: Replacing reg %d in ",
3934 regno);
3935 fprintf (gcse_file, "insn %d with constant ",
3936 INSN_UID (insn));
3937 print_rtl (gcse_file, src);
3938 fprintf (gcse_file, "\n");
3941 /* The original insn setting reg_used may or may not now be
3942 deletable. We leave the deletion to flow. */
3945 /* Try to propagate a CONST_INT into a conditional jump.
3946 We're pretty specific about what we will handle in this
3947 code, we can extend this as necessary over time.
3949 Right now the insn in question must look like
3950 (set (pc) (if_then_else ...)) */
3951 else if (alter_jumps
3952 && GET_CODE (insn) == JUMP_INSN
3953 && condjump_p (insn)
3954 && ! simplejump_p (insn))
3955 changed |= cprop_jump (insn, copy_rtx (insn), reg_used, src);
3956 #ifdef HAVE_cc0
3957 /* Similar code for machines that use a pair of CC0 setter and
3958 conditional jump insn. */
3959 else if (alter_jumps
3960 && GET_CODE (PATTERN (insn)) == SET
3961 && SET_DEST (PATTERN (insn)) == cc0_rtx
3962 && GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
3963 && condjump_p (NEXT_INSN (insn))
3964 && ! simplejump_p (NEXT_INSN (insn)))
3966 if (cprop_cc0_jump (insn, reg_used, src))
3968 changed = 1;
3969 break;
3972 #endif
3974 else if (GET_CODE (src) == REG
3975 && REGNO (src) >= FIRST_PSEUDO_REGISTER
3976 && REGNO (src) != regno)
3978 if (try_replace_reg (reg_used->reg_rtx, src, insn))
3980 changed = 1;
3981 copy_prop_count++;
3982 if (gcse_file != NULL)
3984 fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d",
3985 regno, INSN_UID (insn));
3986 fprintf (gcse_file, " with reg %d\n", REGNO (src));
3989 /* The original insn setting reg_used may or may not now be
3990 deletable. We leave the deletion to flow. */
3991 /* FIXME: If it turns out that the insn isn't deletable,
3992 then we may have unnecessarily extended register lifetimes
3993 and made things worse. */
3998 return changed;
4001 /* Forward propagate copies. This includes copies and constants. Return
4002 non-zero if a change was made. */
4004 static int
4005 cprop (alter_jumps)
4006 int alter_jumps;
4008 int bb, changed;
4009 rtx insn;
4011 /* Note we start at block 1. */
4013 changed = 0;
4014 for (bb = 1; bb < n_basic_blocks; bb++)
4016 /* Reset tables used to keep track of what's still valid [since the
4017 start of the block]. */
4018 reset_opr_set_tables ();
4020 for (insn = BLOCK_HEAD (bb);
4021 insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
4022 insn = NEXT_INSN (insn))
4024 if (INSN_P (insn))
4026 changed |= cprop_insn (insn, alter_jumps);
4028 /* Keep track of everything modified by this insn. */
4029 /* ??? Need to be careful w.r.t. mods done to INSN. Don't
4030 call mark_oprs_set if we turned the insn into a NOTE. */
4031 if (GET_CODE (insn) != NOTE)
4032 mark_oprs_set (insn);
4037 if (gcse_file != NULL)
4038 fprintf (gcse_file, "\n");
4040 return changed;
4043 /* Perform one copy/constant propagation pass.
4044 F is the first insn in the function.
4045 PASS is the pass count. */
4047 static int
4048 one_cprop_pass (pass, alter_jumps)
4049 int pass;
4050 int alter_jumps;
4052 int changed = 0;
4054 const_prop_count = 0;
4055 copy_prop_count = 0;
4057 alloc_set_hash_table (max_cuid);
4058 compute_set_hash_table ();
4059 if (gcse_file)
4060 dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size,
4061 n_sets);
4062 if (n_sets > 0)
4064 alloc_cprop_mem (n_basic_blocks, n_sets);
4065 compute_cprop_data ();
4066 changed = cprop (alter_jumps);
4067 free_cprop_mem ();
4070 free_set_hash_table ();
4072 if (gcse_file)
4074 fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, ",
4075 current_function_name, pass, bytes_used);
4076 fprintf (gcse_file, "%d const props, %d copy props\n\n",
4077 const_prop_count, copy_prop_count);
4080 return changed;
4083 /* Compute PRE+LCM working variables. */
4085 /* Local properties of expressions. */
4086 /* Nonzero for expressions that are transparent in the block. */
4087 static sbitmap *transp;
4089 /* Nonzero for expressions that are transparent at the end of the block.
4090 This is only zero for expressions killed by abnormal critical edge
4091 created by a calls. */
4092 static sbitmap *transpout;
4094 /* Nonzero for expressions that are computed (available) in the block. */
4095 static sbitmap *comp;
4097 /* Nonzero for expressions that are locally anticipatable in the block. */
4098 static sbitmap *antloc;
4100 /* Nonzero for expressions where this block is an optimal computation
4101 point. */
4102 static sbitmap *pre_optimal;
4104 /* Nonzero for expressions which are redundant in a particular block. */
4105 static sbitmap *pre_redundant;
4107 /* Nonzero for expressions which should be inserted on a specific edge. */
4108 static sbitmap *pre_insert_map;
4110 /* Nonzero for expressions which should be deleted in a specific block. */
4111 static sbitmap *pre_delete_map;
4113 /* Contains the edge_list returned by pre_edge_lcm. */
4114 static struct edge_list *edge_list;
4116 /* Redundant insns. */
4117 static sbitmap pre_redundant_insns;
4119 /* Allocate vars used for PRE analysis. */
4121 static void
4122 alloc_pre_mem (n_blocks, n_exprs)
4123 int n_blocks, n_exprs;
4125 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4126 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4127 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
4129 pre_optimal = NULL;
4130 pre_redundant = NULL;
4131 pre_insert_map = NULL;
4132 pre_delete_map = NULL;
4133 ae_in = NULL;
4134 ae_out = NULL;
4135 ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
4137 /* pre_insert and pre_delete are allocated later. */
4140 /* Free vars used for PRE analysis. */
4142 static void
4143 free_pre_mem ()
4145 free (transp);
4146 free (comp);
4148 /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */
4150 if (pre_optimal)
4151 free (pre_optimal);
4152 if (pre_redundant)
4153 free (pre_redundant);
4154 if (pre_insert_map)
4155 free (pre_insert_map);
4156 if (pre_delete_map)
4157 free (pre_delete_map);
4159 if (ae_in)
4160 free (ae_in);
4161 if (ae_out)
4162 free (ae_out);
4164 transp = comp = NULL;
4165 pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
4166 ae_in = ae_out = NULL;
4169 /* Top level routine to do the dataflow analysis needed by PRE. */
4171 static void
4172 compute_pre_data ()
4174 sbitmap trapping_expr;
4175 int i;
4176 unsigned int ui;
4178 compute_local_properties (transp, comp, antloc, 0);
4179 sbitmap_vector_zero (ae_kill, n_basic_blocks);
4181 /* Collect expressions which might trap. */
4182 trapping_expr = sbitmap_alloc (n_exprs);
4183 sbitmap_zero (trapping_expr);
4184 for (ui = 0; ui < expr_hash_table_size; ui++)
4186 struct expr *e;
4187 for (e = expr_hash_table[ui]; e != NULL; e = e->next_same_hash)
4188 if (may_trap_p (e->expr))
4189 SET_BIT (trapping_expr, e->bitmap_index);
4192 /* Compute ae_kill for each basic block using:
4194 ~(TRANSP | COMP)
4196 This is significantly faster than compute_ae_kill. */
4198 for (i = 0; i < n_basic_blocks; i++)
4200 edge e;
4202 /* If the current block is the destination of an abnormal edge, we
4203 kill all trapping expressions because we won't be able to properly
4204 place the instruction on the edge. So make them neither
4205 anticipatable nor transparent. This is fairly conservative. */
4206 for (e = BASIC_BLOCK (i)->pred; e ; e = e->pred_next)
4207 if (e->flags & EDGE_ABNORMAL)
4209 sbitmap_difference (antloc[i], antloc[i], trapping_expr);
4210 sbitmap_difference (transp[i], transp[i], trapping_expr);
4211 break;
4214 sbitmap_a_or_b (ae_kill[i], transp[i], comp[i]);
4215 sbitmap_not (ae_kill[i], ae_kill[i]);
4218 edge_list = pre_edge_lcm (gcse_file, n_exprs, transp, comp, antloc,
4219 ae_kill, &pre_insert_map, &pre_delete_map);
4220 free (antloc);
4221 antloc = NULL;
4222 free (ae_kill);
4223 ae_kill = NULL;
4224 free (trapping_expr);
4227 /* PRE utilities */
4229 /* Return non-zero if an occurrence of expression EXPR in OCCR_BB would reach
4230 block BB.
4232 VISITED is a pointer to a working buffer for tracking which BB's have
4233 been visited. It is NULL for the top-level call.
4235 We treat reaching expressions that go through blocks containing the same
4236 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
4237 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4238 2 as not reaching. The intent is to improve the probability of finding
4239 only one reaching expression and to reduce register lifetimes by picking
4240 the closest such expression. */
4242 static int
4243 pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited)
4244 int occr_bb;
4245 struct expr *expr;
4246 int bb;
4247 char *visited;
4249 edge pred;
4251 for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
4253 int pred_bb = pred->src->index;
4255 if (pred->src == ENTRY_BLOCK_PTR
4256 /* Has predecessor has already been visited? */
4257 || visited[pred_bb])
4258 ;/* Nothing to do. */
4260 /* Does this predecessor generate this expression? */
4261 else if (TEST_BIT (comp[pred_bb], expr->bitmap_index))
4263 /* Is this the occurrence we're looking for?
4264 Note that there's only one generating occurrence per block
4265 so we just need to check the block number. */
4266 if (occr_bb == pred_bb)
4267 return 1;
4269 visited[pred_bb] = 1;
4271 /* Ignore this predecessor if it kills the expression. */
4272 else if (! TEST_BIT (transp[pred_bb], expr->bitmap_index))
4273 visited[pred_bb] = 1;
4275 /* Neither gen nor kill. */
4276 else
4278 visited[pred_bb] = 1;
4279 if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
4280 return 1;
4284 /* All paths have been checked. */
4285 return 0;
4288 /* The wrapper for pre_expr_reaches_here_work that ensures that any
4289 memory allocated for that function is returned. */
4291 static int
4292 pre_expr_reaches_here_p (occr_bb, expr, bb)
4293 int occr_bb;
4294 struct expr *expr;
4295 int bb;
4297 int rval;
4298 char *visited = (char *) xcalloc (n_basic_blocks, 1);
4300 rval = pre_expr_reaches_here_p_work(occr_bb, expr, bb, visited);
4302 free (visited);
4303 return rval;
4307 /* Given an expr, generate RTL which we can insert at the end of a BB,
4308 or on an edge. Set the block number of any insns generated to
4309 the value of BB. */
4311 static rtx
4312 process_insert_insn (expr)
4313 struct expr *expr;
4315 rtx reg = expr->reaching_reg;
4316 rtx pat, copied_expr;
4317 rtx first_new_insn;
4319 start_sequence ();
4320 copied_expr = copy_rtx (expr->expr);
4321 emit_move_insn (reg, copied_expr);
4322 first_new_insn = get_insns ();
4323 pat = gen_sequence ();
4324 end_sequence ();
4326 return pat;
4329 /* Add EXPR to the end of basic block BB.
4331 This is used by both the PRE and code hoisting.
4333 For PRE, we want to verify that the expr is either transparent
4334 or locally anticipatable in the target block. This check makes
4335 no sense for code hoisting. */
4337 static void
4338 insert_insn_end_bb (expr, bb, pre)
4339 struct expr *expr;
4340 int bb;
4341 int pre;
4343 rtx insn = BLOCK_END (bb);
4344 rtx new_insn;
4345 rtx reg = expr->reaching_reg;
4346 int regno = REGNO (reg);
4347 rtx pat;
4348 int i;
4350 pat = process_insert_insn (expr);
4352 /* If the last insn is a jump, insert EXPR in front [taking care to
4353 handle cc0, etc. properly]. */
4355 if (GET_CODE (insn) == JUMP_INSN)
4357 #ifdef HAVE_cc0
4358 rtx note;
4359 #endif
4361 /* If this is a jump table, then we can't insert stuff here. Since
4362 we know the previous real insn must be the tablejump, we insert
4363 the new instruction just before the tablejump. */
4364 if (GET_CODE (PATTERN (insn)) == ADDR_VEC
4365 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
4366 insn = prev_real_insn (insn);
4368 #ifdef HAVE_cc0
4369 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4370 if cc0 isn't set. */
4371 note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
4372 if (note)
4373 insn = XEXP (note, 0);
4374 else
4376 rtx maybe_cc0_setter = prev_nonnote_insn (insn);
4377 if (maybe_cc0_setter
4378 && INSN_P (maybe_cc0_setter)
4379 && sets_cc0_p (PATTERN (maybe_cc0_setter)))
4380 insn = maybe_cc0_setter;
4382 #endif
4383 /* FIXME: What if something in cc0/jump uses value set in new insn? */
4384 new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4387 /* Likewise if the last insn is a call, as will happen in the presence
4388 of exception handling. */
4389 else if (GET_CODE (insn) == CALL_INSN)
4391 HARD_REG_SET parm_regs;
4392 int nparm_regs;
4393 rtx p;
4395 /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4396 we search backward and place the instructions before the first
4397 parameter is loaded. Do this for everyone for consistency and a
4398 presumtion that we'll get better code elsewhere as well.
4400 It should always be the case that we can put these instructions
4401 anywhere in the basic block with performing PRE optimizations.
4402 Check this. */
4404 if (pre
4405 && !TEST_BIT (antloc[bb], expr->bitmap_index)
4406 && !TEST_BIT (transp[bb], expr->bitmap_index))
4407 abort ();
4409 /* Since different machines initialize their parameter registers
4410 in different orders, assume nothing. Collect the set of all
4411 parameter registers. */
4412 CLEAR_HARD_REG_SET (parm_regs);
4413 nparm_regs = 0;
4414 for (p = CALL_INSN_FUNCTION_USAGE (insn); p ; p = XEXP (p, 1))
4415 if (GET_CODE (XEXP (p, 0)) == USE
4416 && GET_CODE (XEXP (XEXP (p, 0), 0)) == REG)
4418 if (REGNO (XEXP (XEXP (p, 0), 0)) >= FIRST_PSEUDO_REGISTER)
4419 abort ();
4421 SET_HARD_REG_BIT (parm_regs, REGNO (XEXP (XEXP (p, 0), 0)));
4422 nparm_regs++;
4425 /* Search backward for the first set of a register in this set. */
4426 while (nparm_regs && BLOCK_HEAD (bb) != insn)
4428 insn = PREV_INSN (insn);
4429 p = single_set (insn);
4430 if (p && GET_CODE (SET_DEST (p)) == REG
4431 && REGNO (SET_DEST (p)) < FIRST_PSEUDO_REGISTER
4432 && TEST_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p))))
4434 CLEAR_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p)));
4435 nparm_regs--;
4439 /* If we found all the parameter loads, then we want to insert
4440 before the first parameter load.
4442 If we did not find all the parameter loads, then we might have
4443 stopped on the head of the block, which could be a CODE_LABEL.
4444 If we inserted before the CODE_LABEL, then we would be putting
4445 the insn in the wrong basic block. In that case, put the insn
4446 after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */
4447 while (GET_CODE (insn) == CODE_LABEL
4448 || NOTE_INSN_BASIC_BLOCK_P (insn))
4449 insn = NEXT_INSN (insn);
4451 new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4453 else
4455 new_insn = emit_insn_after (pat, insn);
4456 BLOCK_END (bb) = new_insn;
4459 /* Keep block number table up to date.
4460 Note, PAT could be a multiple insn sequence, we have to make
4461 sure that each insn in the sequence is handled. */
4462 if (GET_CODE (pat) == SEQUENCE)
4464 for (i = 0; i < XVECLEN (pat, 0); i++)
4466 rtx insn = XVECEXP (pat, 0, i);
4468 set_block_num (insn, bb);
4469 if (INSN_P (insn))
4470 add_label_notes (PATTERN (insn), new_insn);
4472 note_stores (PATTERN (insn), record_set_info, insn);
4475 else
4477 add_label_notes (SET_SRC (pat), new_insn);
4478 set_block_num (new_insn, bb);
4480 /* Keep register set table up to date. */
4481 record_one_set (regno, new_insn);
4484 gcse_create_count++;
4486 if (gcse_file)
4488 fprintf (gcse_file, "PRE/HOIST: end of bb %d, insn %d, ",
4489 bb, INSN_UID (new_insn));
4490 fprintf (gcse_file, "copying expression %d to reg %d\n",
4491 expr->bitmap_index, regno);
4495 /* Insert partially redundant expressions on edges in the CFG to make
4496 the expressions fully redundant. */
4498 static int
4499 pre_edge_insert (edge_list, index_map)
4500 struct edge_list *edge_list;
4501 struct expr **index_map;
4503 int e, i, j, num_edges, set_size, did_insert = 0;
4504 sbitmap *inserted;
4506 /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
4507 if it reaches any of the deleted expressions. */
4509 set_size = pre_insert_map[0]->size;
4510 num_edges = NUM_EDGES (edge_list);
4511 inserted = sbitmap_vector_alloc (num_edges, n_exprs);
4512 sbitmap_vector_zero (inserted, num_edges);
4514 for (e = 0; e < num_edges; e++)
4516 int indx;
4517 basic_block pred = INDEX_EDGE_PRED_BB (edge_list, e);
4518 int bb = pred->index;
4520 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
4522 SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
4524 for (j = indx; insert && j < n_exprs; j++, insert >>= 1)
4525 if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
4527 struct expr *expr = index_map[j];
4528 struct occr *occr;
4530 /* Now look at each deleted occurence of this expression. */
4531 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4533 if (! occr->deleted_p)
4534 continue;
4536 /* Insert this expression on this edge if if it would
4537 reach the deleted occurence in BB. */
4538 if (!TEST_BIT (inserted[e], j))
4540 rtx insn;
4541 edge eg = INDEX_EDGE (edge_list, e);
4543 /* We can't insert anything on an abnormal and
4544 critical edge, so we insert the insn at the end of
4545 the previous block. There are several alternatives
4546 detailed in Morgans book P277 (sec 10.5) for
4547 handling this situation. This one is easiest for
4548 now. */
4550 if ((eg->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL)
4551 insert_insn_end_bb (index_map[j], bb, 0);
4552 else
4554 insn = process_insert_insn (index_map[j]);
4555 insert_insn_on_edge (insn, eg);
4558 if (gcse_file)
4560 fprintf (gcse_file, "PRE/HOIST: edge (%d,%d), ",
4562 INDEX_EDGE_SUCC_BB (edge_list, e)->index);
4563 fprintf (gcse_file, "copy expression %d\n",
4564 expr->bitmap_index);
4567 SET_BIT (inserted[e], j);
4568 did_insert = 1;
4569 gcse_create_count++;
4576 free (inserted);
4577 return did_insert;
4580 /* Copy the result of INSN to REG. INDX is the expression number. */
4582 static void
4583 pre_insert_copy_insn (expr, insn)
4584 struct expr *expr;
4585 rtx insn;
4587 rtx reg = expr->reaching_reg;
4588 int regno = REGNO (reg);
4589 int indx = expr->bitmap_index;
4590 rtx set = single_set (insn);
4591 rtx new_insn;
4592 int bb = BLOCK_NUM (insn);
4594 if (!set)
4595 abort ();
4597 new_insn = emit_insn_after (gen_rtx_SET (VOIDmode, reg, SET_DEST (set)),
4598 insn);
4600 /* Keep block number table up to date. */
4601 set_block_num (new_insn, bb);
4603 /* Keep register set table up to date. */
4604 record_one_set (regno, new_insn);
4605 if (insn == BLOCK_END (bb))
4606 BLOCK_END (bb) = new_insn;
4608 gcse_create_count++;
4610 if (gcse_file)
4611 fprintf (gcse_file,
4612 "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
4613 BLOCK_NUM (insn), INSN_UID (new_insn), indx,
4614 INSN_UID (insn), regno);
4617 /* Copy available expressions that reach the redundant expression
4618 to `reaching_reg'. */
4620 static void
4621 pre_insert_copies ()
4623 unsigned int i;
4624 struct expr *expr;
4625 struct occr *occr;
4626 struct occr *avail;
4628 /* For each available expression in the table, copy the result to
4629 `reaching_reg' if the expression reaches a deleted one.
4631 ??? The current algorithm is rather brute force.
4632 Need to do some profiling. */
4634 for (i = 0; i < expr_hash_table_size; i++)
4635 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4637 /* If the basic block isn't reachable, PPOUT will be TRUE. However,
4638 we don't want to insert a copy here because the expression may not
4639 really be redundant. So only insert an insn if the expression was
4640 deleted. This test also avoids further processing if the
4641 expression wasn't deleted anywhere. */
4642 if (expr->reaching_reg == NULL)
4643 continue;
4645 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4647 if (! occr->deleted_p)
4648 continue;
4650 for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
4652 rtx insn = avail->insn;
4654 /* No need to handle this one if handled already. */
4655 if (avail->copied_p)
4656 continue;
4658 /* Don't handle this one if it's a redundant one. */
4659 if (TEST_BIT (pre_redundant_insns, INSN_CUID (insn)))
4660 continue;
4662 /* Or if the expression doesn't reach the deleted one. */
4663 if (! pre_expr_reaches_here_p (BLOCK_NUM (avail->insn), expr,
4664 BLOCK_NUM (occr->insn)))
4665 continue;
4667 /* Copy the result of avail to reaching_reg. */
4668 pre_insert_copy_insn (expr, insn);
4669 avail->copied_p = 1;
4675 /* Delete redundant computations.
4676 Deletion is done by changing the insn to copy the `reaching_reg' of
4677 the expression into the result of the SET. It is left to later passes
4678 (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4680 Returns non-zero if a change is made. */
4682 static int
4683 pre_delete ()
4685 unsigned int i;
4686 int changed;
4687 struct expr *expr;
4688 struct occr *occr;
4690 changed = 0;
4691 for (i = 0; i < expr_hash_table_size; i++)
4692 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4694 int indx = expr->bitmap_index;
4696 /* We only need to search antic_occr since we require
4697 ANTLOC != 0. */
4699 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4701 rtx insn = occr->insn;
4702 rtx set;
4703 int bb = BLOCK_NUM (insn);
4705 if (TEST_BIT (pre_delete_map[bb], indx))
4707 set = single_set (insn);
4708 if (! set)
4709 abort ();
4711 /* Create a pseudo-reg to store the result of reaching
4712 expressions into. Get the mode for the new pseudo from
4713 the mode of the original destination pseudo. */
4714 if (expr->reaching_reg == NULL)
4715 expr->reaching_reg
4716 = gen_reg_rtx (GET_MODE (SET_DEST (set)));
4718 /* In theory this should never fail since we're creating
4719 a reg->reg copy.
4721 However, on the x86 some of the movXX patterns actually
4722 contain clobbers of scratch regs. This may cause the
4723 insn created by validate_change to not match any pattern
4724 and thus cause validate_change to fail. */
4725 if (validate_change (insn, &SET_SRC (set),
4726 expr->reaching_reg, 0))
4728 occr->deleted_p = 1;
4729 SET_BIT (pre_redundant_insns, INSN_CUID (insn));
4730 changed = 1;
4731 gcse_subst_count++;
4734 if (gcse_file)
4736 fprintf (gcse_file,
4737 "PRE: redundant insn %d (expression %d) in ",
4738 INSN_UID (insn), indx);
4739 fprintf (gcse_file, "bb %d, reaching reg is %d\n",
4740 bb, REGNO (expr->reaching_reg));
4746 return changed;
4749 /* Perform GCSE optimizations using PRE.
4750 This is called by one_pre_gcse_pass after all the dataflow analysis
4751 has been done.
4753 This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
4754 lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
4755 Compiler Design and Implementation.
4757 ??? A new pseudo reg is created to hold the reaching expression. The nice
4758 thing about the classical approach is that it would try to use an existing
4759 reg. If the register can't be adequately optimized [i.e. we introduce
4760 reload problems], one could add a pass here to propagate the new register
4761 through the block.
4763 ??? We don't handle single sets in PARALLELs because we're [currently] not
4764 able to copy the rest of the parallel when we insert copies to create full
4765 redundancies from partial redundancies. However, there's no reason why we
4766 can't handle PARALLELs in the cases where there are no partial
4767 redundancies. */
4769 static int
4770 pre_gcse ()
4772 unsigned int i;
4773 int did_insert, changed;
4774 struct expr **index_map;
4775 struct expr *expr;
4777 /* Compute a mapping from expression number (`bitmap_index') to
4778 hash table entry. */
4780 index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
4781 for (i = 0; i < expr_hash_table_size; i++)
4782 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4783 index_map[expr->bitmap_index] = expr;
4785 /* Reset bitmap used to track which insns are redundant. */
4786 pre_redundant_insns = sbitmap_alloc (max_cuid);
4787 sbitmap_zero (pre_redundant_insns);
4789 /* Delete the redundant insns first so that
4790 - we know what register to use for the new insns and for the other
4791 ones with reaching expressions
4792 - we know which insns are redundant when we go to create copies */
4794 changed = pre_delete ();
4796 did_insert = pre_edge_insert (edge_list, index_map);
4798 /* In other places with reaching expressions, copy the expression to the
4799 specially allocated pseudo-reg that reaches the redundant expr. */
4800 pre_insert_copies ();
4801 if (did_insert)
4803 commit_edge_insertions ();
4804 changed = 1;
4807 free (index_map);
4808 free (pre_redundant_insns);
4809 return changed;
4812 /* Top level routine to perform one PRE GCSE pass.
4814 Return non-zero if a change was made. */
4816 static int
4817 one_pre_gcse_pass (pass)
4818 int pass;
4820 int changed = 0;
4822 gcse_subst_count = 0;
4823 gcse_create_count = 0;
4825 alloc_expr_hash_table (max_cuid);
4826 add_noreturn_fake_exit_edges ();
4827 compute_expr_hash_table ();
4828 if (gcse_file)
4829 dump_hash_table (gcse_file, "Expression", expr_hash_table,
4830 expr_hash_table_size, n_exprs);
4832 if (n_exprs > 0)
4834 alloc_pre_mem (n_basic_blocks, n_exprs);
4835 compute_pre_data ();
4836 changed |= pre_gcse ();
4837 free_edge_list (edge_list);
4838 free_pre_mem ();
4841 remove_fake_edges ();
4842 free_expr_hash_table ();
4844 if (gcse_file)
4846 fprintf (gcse_file, "\nPRE GCSE of %s, pass %d: %d bytes needed, ",
4847 current_function_name, pass, bytes_used);
4848 fprintf (gcse_file, "%d substs, %d insns created\n",
4849 gcse_subst_count, gcse_create_count);
4852 return changed;
4855 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
4856 If notes are added to an insn which references a CODE_LABEL, the
4857 LABEL_NUSES count is incremented. We have to add REG_LABEL notes,
4858 because the following loop optimization pass requires them. */
4860 /* ??? This is very similar to the loop.c add_label_notes function. We
4861 could probably share code here. */
4863 /* ??? If there was a jump optimization pass after gcse and before loop,
4864 then we would not need to do this here, because jump would add the
4865 necessary REG_LABEL notes. */
4867 static void
4868 add_label_notes (x, insn)
4869 rtx x;
4870 rtx insn;
4872 enum rtx_code code = GET_CODE (x);
4873 int i, j;
4874 const char *fmt;
4876 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4878 /* This code used to ignore labels that referred to dispatch tables to
4879 avoid flow generating (slighly) worse code.
4881 We no longer ignore such label references (see LABEL_REF handling in
4882 mark_jump_label for additional information). */
4884 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, XEXP (x, 0),
4885 REG_NOTES (insn));
4886 if (LABEL_P (XEXP (x, 0)))
4887 LABEL_NUSES (XEXP (x, 0))++;
4888 return;
4891 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
4893 if (fmt[i] == 'e')
4894 add_label_notes (XEXP (x, i), insn);
4895 else if (fmt[i] == 'E')
4896 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4897 add_label_notes (XVECEXP (x, i, j), insn);
4901 /* Compute transparent outgoing information for each block.
4903 An expression is transparent to an edge unless it is killed by
4904 the edge itself. This can only happen with abnormal control flow,
4905 when the edge is traversed through a call. This happens with
4906 non-local labels and exceptions.
4908 This would not be necessary if we split the edge. While this is
4909 normally impossible for abnormal critical edges, with some effort
4910 it should be possible with exception handling, since we still have
4911 control over which handler should be invoked. But due to increased
4912 EH table sizes, this may not be worthwhile. */
4914 static void
4915 compute_transpout ()
4917 int bb;
4918 unsigned int i;
4919 struct expr *expr;
4921 sbitmap_vector_ones (transpout, n_basic_blocks);
4923 for (bb = 0; bb < n_basic_blocks; ++bb)
4925 /* Note that flow inserted a nop a the end of basic blocks that
4926 end in call instructions for reasons other than abnormal
4927 control flow. */
4928 if (GET_CODE (BLOCK_END (bb)) != CALL_INSN)
4929 continue;
4931 for (i = 0; i < expr_hash_table_size; i++)
4932 for (expr = expr_hash_table[i]; expr ; expr = expr->next_same_hash)
4933 if (GET_CODE (expr->expr) == MEM)
4935 if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
4936 && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
4937 continue;
4939 /* ??? Optimally, we would use interprocedural alias
4940 analysis to determine if this mem is actually killed
4941 by this call. */
4942 RESET_BIT (transpout[bb], expr->bitmap_index);
4947 /* Removal of useless null pointer checks */
4949 /* Called via note_stores. X is set by SETTER. If X is a register we must
4950 invalidate nonnull_local and set nonnull_killed. DATA is really a
4951 `null_pointer_info *'.
4953 We ignore hard registers. */
4955 static void
4956 invalidate_nonnull_info (x, setter, data)
4957 rtx x;
4958 rtx setter ATTRIBUTE_UNUSED;
4959 void *data;
4961 unsigned int regno;
4962 struct null_pointer_info *npi = (struct null_pointer_info *) data;
4964 while (GET_CODE (x) == SUBREG)
4965 x = SUBREG_REG (x);
4967 /* Ignore anything that is not a register or is a hard register. */
4968 if (GET_CODE (x) != REG
4969 || REGNO (x) < npi->min_reg
4970 || REGNO (x) >= npi->max_reg)
4971 return;
4973 regno = REGNO (x) - npi->min_reg;
4975 RESET_BIT (npi->nonnull_local[npi->current_block], regno);
4976 SET_BIT (npi->nonnull_killed[npi->current_block], regno);
4979 /* Do null-pointer check elimination for the registers indicated in
4980 NPI. NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps;
4981 they are not our responsibility to free. */
4983 static void
4984 delete_null_pointer_checks_1 (delete_list, block_reg, nonnull_avin,
4985 nonnull_avout, npi)
4986 varray_type *delete_list;
4987 unsigned int *block_reg;
4988 sbitmap *nonnull_avin;
4989 sbitmap *nonnull_avout;
4990 struct null_pointer_info *npi;
4992 int bb;
4993 int current_block;
4994 sbitmap *nonnull_local = npi->nonnull_local;
4995 sbitmap *nonnull_killed = npi->nonnull_killed;
4997 /* Compute local properties, nonnull and killed. A register will have
4998 the nonnull property if at the end of the current block its value is
4999 known to be nonnull. The killed property indicates that somewhere in
5000 the block any information we had about the register is killed.
5002 Note that a register can have both properties in a single block. That
5003 indicates that it's killed, then later in the block a new value is
5004 computed. */
5005 sbitmap_vector_zero (nonnull_local, n_basic_blocks);
5006 sbitmap_vector_zero (nonnull_killed, n_basic_blocks);
5008 for (current_block = 0; current_block < n_basic_blocks; current_block++)
5010 rtx insn, stop_insn;
5012 /* Set the current block for invalidate_nonnull_info. */
5013 npi->current_block = current_block;
5015 /* Scan each insn in the basic block looking for memory references and
5016 register sets. */
5017 stop_insn = NEXT_INSN (BLOCK_END (current_block));
5018 for (insn = BLOCK_HEAD (current_block);
5019 insn != stop_insn;
5020 insn = NEXT_INSN (insn))
5022 rtx set;
5023 rtx reg;
5025 /* Ignore anything that is not a normal insn. */
5026 if (! INSN_P (insn))
5027 continue;
5029 /* Basically ignore anything that is not a simple SET. We do have
5030 to make sure to invalidate nonnull_local and set nonnull_killed
5031 for such insns though. */
5032 set = single_set (insn);
5033 if (!set)
5035 note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
5036 continue;
5039 /* See if we've got a useable memory load. We handle it first
5040 in case it uses its address register as a dest (which kills
5041 the nonnull property). */
5042 if (GET_CODE (SET_SRC (set)) == MEM
5043 && GET_CODE ((reg = XEXP (SET_SRC (set), 0))) == REG
5044 && REGNO (reg) >= npi->min_reg
5045 && REGNO (reg) < npi->max_reg)
5046 SET_BIT (nonnull_local[current_block],
5047 REGNO (reg) - npi->min_reg);
5049 /* Now invalidate stuff clobbered by this insn. */
5050 note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
5052 /* And handle stores, we do these last since any sets in INSN can
5053 not kill the nonnull property if it is derived from a MEM
5054 appearing in a SET_DEST. */
5055 if (GET_CODE (SET_DEST (set)) == MEM
5056 && GET_CODE ((reg = XEXP (SET_DEST (set), 0))) == REG
5057 && REGNO (reg) >= npi->min_reg
5058 && REGNO (reg) < npi->max_reg)
5059 SET_BIT (nonnull_local[current_block],
5060 REGNO (reg) - npi->min_reg);
5064 /* Now compute global properties based on the local properties. This
5065 is a classic global availablity algorithm. */
5066 compute_available (nonnull_local, nonnull_killed,
5067 nonnull_avout, nonnull_avin);
5069 /* Now look at each bb and see if it ends with a compare of a value
5070 against zero. */
5071 for (bb = 0; bb < n_basic_blocks; bb++)
5073 rtx last_insn = BLOCK_END (bb);
5074 rtx condition, earliest;
5075 int compare_and_branch;
5077 /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and
5078 since BLOCK_REG[BB] is zero if this block did not end with a
5079 comparison against zero, this condition works. */
5080 if (block_reg[bb] < npi->min_reg
5081 || block_reg[bb] >= npi->max_reg)
5082 continue;
5084 /* LAST_INSN is a conditional jump. Get its condition. */
5085 condition = get_condition (last_insn, &earliest);
5087 /* If we can't determine the condition then skip. */
5088 if (! condition)
5089 continue;
5091 /* Is the register known to have a nonzero value? */
5092 if (!TEST_BIT (nonnull_avout[bb], block_reg[bb] - npi->min_reg))
5093 continue;
5095 /* Try to compute whether the compare/branch at the loop end is one or
5096 two instructions. */
5097 if (earliest == last_insn)
5098 compare_and_branch = 1;
5099 else if (earliest == prev_nonnote_insn (last_insn))
5100 compare_and_branch = 2;
5101 else
5102 continue;
5104 /* We know the register in this comparison is nonnull at exit from
5105 this block. We can optimize this comparison. */
5106 if (GET_CODE (condition) == NE)
5108 rtx new_jump;
5110 new_jump = emit_jump_insn_before (gen_jump (JUMP_LABEL (last_insn)),
5111 last_insn);
5112 JUMP_LABEL (new_jump) = JUMP_LABEL (last_insn);
5113 LABEL_NUSES (JUMP_LABEL (new_jump))++;
5114 emit_barrier_after (new_jump);
5116 if (!*delete_list)
5117 VARRAY_RTX_INIT (*delete_list, 10, "delete_list");
5119 VARRAY_PUSH_RTX (*delete_list, last_insn);
5120 if (compare_and_branch == 2)
5121 VARRAY_PUSH_RTX (*delete_list, earliest);
5123 /* Don't check this block again. (Note that BLOCK_END is
5124 invalid here; we deleted the last instruction in the
5125 block.) */
5126 block_reg[bb] = 0;
5130 /* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
5131 at compile time.
5133 This is conceptually similar to global constant/copy propagation and
5134 classic global CSE (it even uses the same dataflow equations as cprop).
5136 If a register is used as memory address with the form (mem (reg)), then we
5137 know that REG can not be zero at that point in the program. Any instruction
5138 which sets REG "kills" this property.
5140 So, if every path leading to a conditional branch has an available memory
5141 reference of that form, then we know the register can not have the value
5142 zero at the conditional branch.
5144 So we merely need to compute the local properies and propagate that data
5145 around the cfg, then optimize where possible.
5147 We run this pass two times. Once before CSE, then again after CSE. This
5148 has proven to be the most profitable approach. It is rare for new
5149 optimization opportunities of this nature to appear after the first CSE
5150 pass.
5152 This could probably be integrated with global cprop with a little work. */
5154 void
5155 delete_null_pointer_checks (f)
5156 rtx f ATTRIBUTE_UNUSED;
5158 sbitmap *nonnull_avin, *nonnull_avout;
5159 unsigned int *block_reg;
5160 varray_type delete_list = NULL;
5161 int bb;
5162 int reg;
5163 int regs_per_pass;
5164 int max_reg;
5165 unsigned int i;
5166 struct null_pointer_info npi;
5168 /* If we have only a single block, then there's nothing to do. */
5169 if (n_basic_blocks <= 1)
5170 return;
5172 /* Trying to perform global optimizations on flow graphs which have
5173 a high connectivity will take a long time and is unlikely to be
5174 particularly useful.
5176 In normal circumstances a cfg should have about twice has many edges
5177 as blocks. But we do not want to punish small functions which have
5178 a couple switch statements. So we require a relatively large number
5179 of basic blocks and the ratio of edges to blocks to be high. */
5180 if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
5181 return;
5183 /* We need four bitmaps, each with a bit for each register in each
5184 basic block. */
5185 max_reg = max_reg_num ();
5186 regs_per_pass = get_bitmap_width (4, n_basic_blocks, max_reg);
5188 /* Allocate bitmaps to hold local and global properties. */
5189 npi.nonnull_local = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5190 npi.nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5191 nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5192 nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5194 /* Go through the basic blocks, seeing whether or not each block
5195 ends with a conditional branch whose condition is a comparison
5196 against zero. Record the register compared in BLOCK_REG. */
5197 block_reg = (unsigned int *) xcalloc (n_basic_blocks, sizeof (int));
5198 for (bb = 0; bb < n_basic_blocks; bb++)
5200 rtx last_insn = BLOCK_END (bb);
5201 rtx condition, earliest, reg;
5203 /* We only want conditional branches. */
5204 if (GET_CODE (last_insn) != JUMP_INSN
5205 || !any_condjump_p (last_insn)
5206 || !onlyjump_p (last_insn))
5207 continue;
5209 /* LAST_INSN is a conditional jump. Get its condition. */
5210 condition = get_condition (last_insn, &earliest);
5212 /* If we were unable to get the condition, or it is not a equality
5213 comparison against zero then there's nothing we can do. */
5214 if (!condition
5215 || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
5216 || GET_CODE (XEXP (condition, 1)) != CONST_INT
5217 || (XEXP (condition, 1)
5218 != CONST0_RTX (GET_MODE (XEXP (condition, 0)))))
5219 continue;
5221 /* We must be checking a register against zero. */
5222 reg = XEXP (condition, 0);
5223 if (GET_CODE (reg) != REG)
5224 continue;
5226 block_reg[bb] = REGNO (reg);
5229 /* Go through the algorithm for each block of registers. */
5230 for (reg = FIRST_PSEUDO_REGISTER; reg < max_reg; reg += regs_per_pass)
5232 npi.min_reg = reg;
5233 npi.max_reg = MIN (reg + regs_per_pass, max_reg);
5234 delete_null_pointer_checks_1 (&delete_list, block_reg, nonnull_avin,
5235 nonnull_avout, &npi);
5238 /* Now delete the instructions all at once. This breaks the CFG. */
5239 if (delete_list)
5241 for (i = 0; i < VARRAY_ACTIVE_SIZE (delete_list); i++)
5242 delete_insn (VARRAY_RTX (delete_list, i));
5243 VARRAY_FREE (delete_list);
5246 /* Free the table of registers compared at the end of every block. */
5247 free (block_reg);
5249 /* Free bitmaps. */
5250 free (npi.nonnull_local);
5251 free (npi.nonnull_killed);
5252 free (nonnull_avin);
5253 free (nonnull_avout);
5256 /* Code Hoisting variables and subroutines. */
5258 /* Very busy expressions. */
5259 static sbitmap *hoist_vbein;
5260 static sbitmap *hoist_vbeout;
5262 /* Hoistable expressions. */
5263 static sbitmap *hoist_exprs;
5265 /* Dominator bitmaps. */
5266 static sbitmap *dominators;
5268 /* ??? We could compute post dominators and run this algorithm in
5269 reverse to to perform tail merging, doing so would probably be
5270 more effective than the tail merging code in jump.c.
5272 It's unclear if tail merging could be run in parallel with
5273 code hoisting. It would be nice. */
5275 /* Allocate vars used for code hoisting analysis. */
5277 static void
5278 alloc_code_hoist_mem (n_blocks, n_exprs)
5279 int n_blocks, n_exprs;
5281 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
5282 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
5283 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
5285 hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
5286 hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
5287 hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
5288 transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
5290 dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
5293 /* Free vars used for code hoisting analysis. */
5295 static void
5296 free_code_hoist_mem ()
5298 free (antloc);
5299 free (transp);
5300 free (comp);
5302 free (hoist_vbein);
5303 free (hoist_vbeout);
5304 free (hoist_exprs);
5305 free (transpout);
5307 free (dominators);
5310 /* Compute the very busy expressions at entry/exit from each block.
5312 An expression is very busy if all paths from a given point
5313 compute the expression. */
5315 static void
5316 compute_code_hoist_vbeinout ()
5318 int bb, changed, passes;
5320 sbitmap_vector_zero (hoist_vbeout, n_basic_blocks);
5321 sbitmap_vector_zero (hoist_vbein, n_basic_blocks);
5323 passes = 0;
5324 changed = 1;
5326 while (changed)
5328 changed = 0;
5330 /* We scan the blocks in the reverse order to speed up
5331 the convergence. */
5332 for (bb = n_basic_blocks - 1; bb >= 0; bb--)
5334 changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb],
5335 hoist_vbeout[bb], transp[bb]);
5336 if (bb != n_basic_blocks - 1)
5337 sbitmap_intersection_of_succs (hoist_vbeout[bb], hoist_vbein, bb);
5340 passes++;
5343 if (gcse_file)
5344 fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes);
5347 /* Top level routine to do the dataflow analysis needed by code hoisting. */
5349 static void
5350 compute_code_hoist_data ()
5352 compute_local_properties (transp, comp, antloc, 0);
5353 compute_transpout ();
5354 compute_code_hoist_vbeinout ();
5355 calculate_dominance_info (NULL, dominators, CDI_DOMINATORS);
5356 if (gcse_file)
5357 fprintf (gcse_file, "\n");
5360 /* Determine if the expression identified by EXPR_INDEX would
5361 reach BB unimpared if it was placed at the end of EXPR_BB.
5363 It's unclear exactly what Muchnick meant by "unimpared". It seems
5364 to me that the expression must either be computed or transparent in
5365 *every* block in the path(s) from EXPR_BB to BB. Any other definition
5366 would allow the expression to be hoisted out of loops, even if
5367 the expression wasn't a loop invariant.
5369 Contrast this to reachability for PRE where an expression is
5370 considered reachable if *any* path reaches instead of *all*
5371 paths. */
5373 static int
5374 hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited)
5375 int expr_bb;
5376 int expr_index;
5377 int bb;
5378 char *visited;
5380 edge pred;
5381 int visited_allocated_locally = 0;
5384 if (visited == NULL)
5386 visited_allocated_locally = 1;
5387 visited = xcalloc (n_basic_blocks, 1);
5390 for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
5392 int pred_bb = pred->src->index;
5394 if (pred->src == ENTRY_BLOCK_PTR)
5395 break;
5396 else if (visited[pred_bb])
5397 continue;
5399 /* Does this predecessor generate this expression? */
5400 else if (TEST_BIT (comp[pred_bb], expr_index))
5401 break;
5402 else if (! TEST_BIT (transp[pred_bb], expr_index))
5403 break;
5405 /* Not killed. */
5406 else
5408 visited[pred_bb] = 1;
5409 if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
5410 pred_bb, visited))
5411 break;
5414 if (visited_allocated_locally)
5415 free (visited);
5417 return (pred == NULL);
5420 /* Actually perform code hoisting. */
5422 static void
5423 hoist_code ()
5425 int bb, dominated;
5426 unsigned int i;
5427 struct expr **index_map;
5428 struct expr *expr;
5430 sbitmap_vector_zero (hoist_exprs, n_basic_blocks);
5432 /* Compute a mapping from expression number (`bitmap_index') to
5433 hash table entry. */
5435 index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
5436 for (i = 0; i < expr_hash_table_size; i++)
5437 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
5438 index_map[expr->bitmap_index] = expr;
5440 /* Walk over each basic block looking for potentially hoistable
5441 expressions, nothing gets hoisted from the entry block. */
5442 for (bb = 0; bb < n_basic_blocks; bb++)
5444 int found = 0;
5445 int insn_inserted_p;
5447 /* Examine each expression that is very busy at the exit of this
5448 block. These are the potentially hoistable expressions. */
5449 for (i = 0; i < hoist_vbeout[bb]->n_bits; i++)
5451 int hoistable = 0;
5453 if (TEST_BIT (hoist_vbeout[bb], i) && TEST_BIT (transpout[bb], i))
5455 /* We've found a potentially hoistable expression, now
5456 we look at every block BB dominates to see if it
5457 computes the expression. */
5458 for (dominated = 0; dominated < n_basic_blocks; dominated++)
5460 /* Ignore self dominance. */
5461 if (bb == dominated
5462 || ! TEST_BIT (dominators[dominated], bb))
5463 continue;
5465 /* We've found a dominated block, now see if it computes
5466 the busy expression and whether or not moving that
5467 expression to the "beginning" of that block is safe. */
5468 if (!TEST_BIT (antloc[dominated], i))
5469 continue;
5471 /* Note if the expression would reach the dominated block
5472 unimpared if it was placed at the end of BB.
5474 Keep track of how many times this expression is hoistable
5475 from a dominated block into BB. */
5476 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5477 hoistable++;
5480 /* If we found more than one hoistable occurence of this
5481 expression, then note it in the bitmap of expressions to
5482 hoist. It makes no sense to hoist things which are computed
5483 in only one BB, and doing so tends to pessimize register
5484 allocation. One could increase this value to try harder
5485 to avoid any possible code expansion due to register
5486 allocation issues; however experiments have shown that
5487 the vast majority of hoistable expressions are only movable
5488 from two successors, so raising this threshhold is likely
5489 to nullify any benefit we get from code hoisting. */
5490 if (hoistable > 1)
5492 SET_BIT (hoist_exprs[bb], i);
5493 found = 1;
5498 /* If we found nothing to hoist, then quit now. */
5499 if (! found)
5500 continue;
5502 /* Loop over all the hoistable expressions. */
5503 for (i = 0; i < hoist_exprs[bb]->n_bits; i++)
5505 /* We want to insert the expression into BB only once, so
5506 note when we've inserted it. */
5507 insn_inserted_p = 0;
5509 /* These tests should be the same as the tests above. */
5510 if (TEST_BIT (hoist_vbeout[bb], i))
5512 /* We've found a potentially hoistable expression, now
5513 we look at every block BB dominates to see if it
5514 computes the expression. */
5515 for (dominated = 0; dominated < n_basic_blocks; dominated++)
5517 /* Ignore self dominance. */
5518 if (bb == dominated
5519 || ! TEST_BIT (dominators[dominated], bb))
5520 continue;
5522 /* We've found a dominated block, now see if it computes
5523 the busy expression and whether or not moving that
5524 expression to the "beginning" of that block is safe. */
5525 if (!TEST_BIT (antloc[dominated], i))
5526 continue;
5528 /* The expression is computed in the dominated block and
5529 it would be safe to compute it at the start of the
5530 dominated block. Now we have to determine if the
5531 expresion would reach the dominated block if it was
5532 placed at the end of BB. */
5533 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5535 struct expr *expr = index_map[i];
5536 struct occr *occr = expr->antic_occr;
5537 rtx insn;
5538 rtx set;
5540 /* Find the right occurence of this expression. */
5541 while (BLOCK_NUM (occr->insn) != dominated && occr)
5542 occr = occr->next;
5544 /* Should never happen. */
5545 if (!occr)
5546 abort ();
5548 insn = occr->insn;
5550 set = single_set (insn);
5551 if (! set)
5552 abort ();
5554 /* Create a pseudo-reg to store the result of reaching
5555 expressions into. Get the mode for the new pseudo
5556 from the mode of the original destination pseudo. */
5557 if (expr->reaching_reg == NULL)
5558 expr->reaching_reg
5559 = gen_reg_rtx (GET_MODE (SET_DEST (set)));
5561 /* In theory this should never fail since we're creating
5562 a reg->reg copy.
5564 However, on the x86 some of the movXX patterns
5565 actually contain clobbers of scratch regs. This may
5566 cause the insn created by validate_change to not
5567 match any pattern and thus cause validate_change to
5568 fail. */
5569 if (validate_change (insn, &SET_SRC (set),
5570 expr->reaching_reg, 0))
5572 occr->deleted_p = 1;
5573 if (!insn_inserted_p)
5575 insert_insn_end_bb (index_map[i], bb, 0);
5576 insn_inserted_p = 1;
5585 free (index_map);
5588 /* Top level routine to perform one code hoisting (aka unification) pass
5590 Return non-zero if a change was made. */
5592 static int
5593 one_code_hoisting_pass ()
5595 int changed = 0;
5597 alloc_expr_hash_table (max_cuid);
5598 compute_expr_hash_table ();
5599 if (gcse_file)
5600 dump_hash_table (gcse_file, "Code Hosting Expressions", expr_hash_table,
5601 expr_hash_table_size, n_exprs);
5603 if (n_exprs > 0)
5605 alloc_code_hoist_mem (n_basic_blocks, n_exprs);
5606 compute_code_hoist_data ();
5607 hoist_code ();
5608 free_code_hoist_mem ();
5611 free_expr_hash_table ();
5613 return changed;