Update version
[official-gcc.git] / gcc / gcse.c
blobe4372de4cdb7bac40cf85a69904e4987a8ea0fca
1 /* Global common subexpression elimination/Partial redundancy elimination
2 and global constant/copy propagation for GNU compiler.
3 Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* TODO
23 - reordering of memory allocation and freeing to be more space efficient
24 - do rough calc of how many regs are needed in each block, and a rough
25 calc of how many regs are available in each class and use that to
26 throttle back the code in cases where RTX_COST is minimal.
27 - dead store elimination
28 - a store to the same address as a load does not kill the load if the
29 source of the store is also the destination of the load. Handling this
30 allows more load motion, particularly out of loops.
31 - ability to realloc sbitmap vectors would allow one initial computation
32 of reg_set_in_block with only subsequent additions, rather than
33 recomputing it for each pass
37 /* References searched while implementing this.
39 Compilers Principles, Techniques and Tools
40 Aho, Sethi, Ullman
41 Addison-Wesley, 1988
43 Global Optimization by Suppression of Partial Redundancies
44 E. Morel, C. Renvoise
45 communications of the acm, Vol. 22, Num. 2, Feb. 1979
47 A Portable Machine-Independent Global Optimizer - Design and Measurements
48 Frederick Chow
49 Stanford Ph.D. thesis, Dec. 1983
51 A Fast Algorithm for Code Movement Optimization
52 D.M. Dhamdhere
53 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
55 A Solution to a Problem with Morel and Renvoise's
56 Global Optimization by Suppression of Partial Redundancies
57 K-H Drechsler, M.P. Stadel
58 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
60 Practical Adaptation of the Global Optimization
61 Algorithm of Morel and Renvoise
62 D.M. Dhamdhere
63 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
65 Efficiently Computing Static Single Assignment Form and the Control
66 Dependence Graph
67 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
68 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
70 Lazy Code Motion
71 J. Knoop, O. Ruthing, B. Steffen
72 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
74 What's In a Region? Or Computing Control Dependence Regions in Near-Linear
75 Time for Reducible Flow Control
76 Thomas Ball
77 ACM Letters on Programming Languages and Systems,
78 Vol. 2, Num. 1-4, Mar-Dec 1993
80 An Efficient Representation for Sparse Sets
81 Preston Briggs, Linda Torczon
82 ACM Letters on Programming Languages and Systems,
83 Vol. 2, Num. 1-4, Mar-Dec 1993
85 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
86 K-H Drechsler, M.P. Stadel
87 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
89 Partial Dead Code Elimination
90 J. Knoop, O. Ruthing, B. Steffen
91 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
93 Effective Partial Redundancy Elimination
94 P. Briggs, K.D. Cooper
95 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
97 The Program Structure Tree: Computing Control Regions in Linear Time
98 R. Johnson, D. Pearson, K. Pingali
99 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
101 Optimal Code Motion: Theory and Practice
102 J. Knoop, O. Ruthing, B. Steffen
103 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
105 The power of assignment motion
106 J. Knoop, O. Ruthing, B. Steffen
107 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
109 Global code motion / global value numbering
110 C. Click
111 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
113 Value Driven Redundancy Elimination
114 L.T. Simpson
115 Rice University Ph.D. thesis, Apr. 1996
117 Value Numbering
118 L.T. Simpson
119 Massively Scalar Compiler Project, Rice University, Sep. 1996
121 High Performance Compilers for Parallel Computing
122 Michael Wolfe
123 Addison-Wesley, 1996
125 Advanced Compiler Design and Implementation
126 Steven Muchnick
127 Morgan Kaufmann, 1997
129 Building an Optimizing Compiler
130 Robert Morgan
131 Digital Press, 1998
133 People wishing to speed up the code here should read:
134 Elimination Algorithms for Data Flow Analysis
135 B.G. Ryder, M.C. Paull
136 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
138 How to Analyze Large Programs Efficiently and Informatively
139 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
140 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
142 People wishing to do something different can find various possibilities
143 in the above papers and elsewhere.
146 #include "config.h"
147 #include "system.h"
148 #include "toplev.h"
150 #include "rtl.h"
151 #include "tm_p.h"
152 #include "regs.h"
153 #include "hard-reg-set.h"
154 #include "flags.h"
155 #include "real.h"
156 #include "insn-config.h"
157 #include "recog.h"
158 #include "basic-block.h"
159 #include "output.h"
160 #include "function.h"
161 #include "expr.h"
162 #include "params.h"
164 #include "obstack.h"
165 #define obstack_chunk_alloc gmalloc
166 #define obstack_chunk_free free
168 /* Maximum number of passes to perform. */
169 #define MAX_PASSES 1
171 /* Propagate flow information through back edges and thus enable PRE's
172 moving loop invariant calculations out of loops.
174 Originally this tended to create worse overall code, but several
175 improvements during the development of PRE seem to have made following
176 back edges generally a win.
178 Note much of the loop invariant code motion done here would normally
179 be done by loop.c, which has more heuristics for when to move invariants
180 out of loops. At some point we might need to move some of those
181 heuristics into gcse.c. */
182 #define FOLLOW_BACK_EDGES 1
184 /* We support GCSE via Partial Redundancy Elimination. PRE optimizations
185 are a superset of those done by GCSE.
187 We perform the following steps:
189 1) Compute basic block information.
191 2) Compute table of places where registers are set.
193 3) Perform copy/constant propagation.
195 4) Perform global cse.
197 5) Perform another pass of copy/constant propagation.
199 Two passes of copy/constant propagation are done because the first one
200 enables more GCSE and the second one helps to clean up the copies that
201 GCSE creates. This is needed more for PRE than for Classic because Classic
202 GCSE will try to use an existing register containing the common
203 subexpression rather than create a new one. This is harder to do for PRE
204 because of the code motion (which Classic GCSE doesn't do).
206 Expressions we are interested in GCSE-ing are of the form
207 (set (pseudo-reg) (expression)).
208 Function want_to_gcse_p says what these are.
210 PRE handles moving invariant expressions out of loops (by treating them as
211 partially redundant).
213 Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
214 assignment) based GVN (global value numbering). L. T. Simpson's paper
215 (Rice University) on value numbering is a useful reference for this.
217 **********************
219 We used to support multiple passes but there are diminishing returns in
220 doing so. The first pass usually makes 90% of the changes that are doable.
221 A second pass can make a few more changes made possible by the first pass.
222 Experiments show any further passes don't make enough changes to justify
223 the expense.
225 A study of spec92 using an unlimited number of passes:
226 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
227 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
228 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
230 It was found doing copy propagation between each pass enables further
231 substitutions.
233 PRE is quite expensive in complicated functions because the DFA can take
234 awhile to converge. Hence we only perform one pass. Macro MAX_PASSES can
235 be modified if one wants to experiment.
237 **********************
239 The steps for PRE are:
241 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
243 2) Perform the data flow analysis for PRE.
245 3) Delete the redundant instructions
247 4) Insert the required copies [if any] that make the partially
248 redundant instructions fully redundant.
250 5) For other reaching expressions, insert an instruction to copy the value
251 to a newly created pseudo that will reach the redundant instruction.
253 The deletion is done first so that when we do insertions we
254 know which pseudo reg to use.
256 Various papers have argued that PRE DFA is expensive (O(n^2)) and others
257 argue it is not. The number of iterations for the algorithm to converge
258 is typically 2-4 so I don't view it as that expensive (relatively speaking).
260 PRE GCSE depends heavily on the second CSE pass to clean up the copies
261 we create. To make an expression reach the place where it's redundant,
262 the result of the expression is copied to a new register, and the redundant
263 expression is deleted by replacing it with this new register. Classic GCSE
264 doesn't have this problem as much as it computes the reaching defs of
265 each register in each block and thus can try to use an existing register.
267 **********************
269 A fair bit of simplicity is created by creating small functions for simple
270 tasks, even when the function is only called in one place. This may
271 measurably slow things down [or may not] by creating more function call
272 overhead than is necessary. The source is laid out so that it's trivial
273 to make the affected functions inline so that one can measure what speed
274 up, if any, can be achieved, and maybe later when things settle things can
275 be rearranged.
277 Help stamp out big monolithic functions! */
279 /* GCSE global vars. */
281 /* -dG dump file. */
282 static FILE *gcse_file;
284 /* Note whether or not we should run jump optimization after gcse. We
285 want to do this for two cases.
287 * If we changed any jumps via cprop.
289 * If we added any labels via edge splitting. */
291 static int run_jump_opt_after_gcse;
293 /* Bitmaps are normally not included in debugging dumps.
294 However it's useful to be able to print them from GDB.
295 We could create special functions for this, but it's simpler to
296 just allow passing stderr to the dump_foo fns. Since stderr can
297 be a macro, we store a copy here. */
298 static FILE *debug_stderr;
300 /* An obstack for our working variables. */
301 static struct obstack gcse_obstack;
303 /* Non-zero for each mode that supports (set (reg) (reg)).
304 This is trivially true for integer and floating point values.
305 It may or may not be true for condition codes. */
306 static char can_copy_p[(int) NUM_MACHINE_MODES];
308 /* Non-zero if can_copy_p has been initialized. */
309 static int can_copy_init_p;
311 struct reg_use {rtx reg_rtx; };
313 /* Hash table of expressions. */
315 struct expr
317 /* The expression (SET_SRC for expressions, PATTERN for assignments). */
318 rtx expr;
319 /* Index in the available expression bitmaps. */
320 int bitmap_index;
321 /* Next entry with the same hash. */
322 struct expr *next_same_hash;
323 /* List of anticipatable occurrences in basic blocks in the function.
324 An "anticipatable occurrence" is one that is the first occurrence in the
325 basic block, the operands are not modified in the basic block prior
326 to the occurrence and the output is not used between the start of
327 the block and the occurrence. */
328 struct occr *antic_occr;
329 /* List of available occurrence in basic blocks in the function.
330 An "available occurrence" is one that is the last occurrence in the
331 basic block and the operands are not modified by following statements in
332 the basic block [including this insn]. */
333 struct occr *avail_occr;
334 /* Non-null if the computation is PRE redundant.
335 The value is the newly created pseudo-reg to record a copy of the
336 expression in all the places that reach the redundant copy. */
337 rtx reaching_reg;
340 /* Occurrence of an expression.
341 There is one per basic block. If a pattern appears more than once the
342 last appearance is used [or first for anticipatable expressions]. */
344 struct occr
346 /* Next occurrence of this expression. */
347 struct occr *next;
348 /* The insn that computes the expression. */
349 rtx insn;
350 /* Non-zero if this [anticipatable] occurrence has been deleted. */
351 char deleted_p;
352 /* Non-zero if this [available] occurrence has been copied to
353 reaching_reg. */
354 /* ??? This is mutually exclusive with deleted_p, so they could share
355 the same byte. */
356 char copied_p;
359 /* Expression and copy propagation hash tables.
360 Each hash table is an array of buckets.
361 ??? It is known that if it were an array of entries, structure elements
362 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
363 not clear whether in the final analysis a sufficient amount of memory would
364 be saved as the size of the available expression bitmaps would be larger
365 [one could build a mapping table without holes afterwards though].
366 Someday I'll perform the computation and figure it out. */
368 /* Total size of the expression hash table, in elements. */
369 static unsigned int expr_hash_table_size;
371 /* The table itself.
372 This is an array of `expr_hash_table_size' elements. */
373 static struct expr **expr_hash_table;
375 /* Total size of the copy propagation hash table, in elements. */
376 static unsigned int set_hash_table_size;
378 /* The table itself.
379 This is an array of `set_hash_table_size' elements. */
380 static struct expr **set_hash_table;
382 /* Mapping of uids to cuids.
383 Only real insns get cuids. */
384 static int *uid_cuid;
386 /* Highest UID in UID_CUID. */
387 static int max_uid;
389 /* Get the cuid of an insn. */
390 #ifdef ENABLE_CHECKING
391 #define INSN_CUID(INSN) (INSN_UID (INSN) > max_uid ? (abort (), 0) : uid_cuid[INSN_UID (INSN)])
392 #else
393 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
394 #endif
396 /* Number of cuids. */
397 static int max_cuid;
399 /* Mapping of cuids to insns. */
400 static rtx *cuid_insn;
402 /* Get insn from cuid. */
403 #define CUID_INSN(CUID) (cuid_insn[CUID])
405 /* Maximum register number in function prior to doing gcse + 1.
406 Registers created during this pass have regno >= max_gcse_regno.
407 This is named with "gcse" to not collide with global of same name. */
408 static unsigned int max_gcse_regno;
410 /* Maximum number of cse-able expressions found. */
411 static int n_exprs;
413 /* Maximum number of assignments for copy propagation found. */
414 static int n_sets;
416 /* Table of registers that are modified.
418 For each register, each element is a list of places where the pseudo-reg
419 is set.
421 For simplicity, GCSE is done on sets of pseudo-regs only. PRE GCSE only
422 requires knowledge of which blocks kill which regs [and thus could use
423 a bitmap instead of the lists `reg_set_table' uses].
425 `reg_set_table' and could be turned into an array of bitmaps (num-bbs x
426 num-regs) [however perhaps it may be useful to keep the data as is]. One
427 advantage of recording things this way is that `reg_set_table' is fairly
428 sparse with respect to pseudo regs but for hard regs could be fairly dense
429 [relatively speaking]. And recording sets of pseudo-regs in lists speeds
430 up functions like compute_transp since in the case of pseudo-regs we only
431 need to iterate over the number of times a pseudo-reg is set, not over the
432 number of basic blocks [clearly there is a bit of a slow down in the cases
433 where a pseudo is set more than once in a block, however it is believed
434 that the net effect is to speed things up]. This isn't done for hard-regs
435 because recording call-clobbered hard-regs in `reg_set_table' at each
436 function call can consume a fair bit of memory, and iterating over
437 hard-regs stored this way in compute_transp will be more expensive. */
439 typedef struct reg_set
441 /* The next setting of this register. */
442 struct reg_set *next;
443 /* The insn where it was set. */
444 rtx insn;
445 } reg_set;
447 static reg_set **reg_set_table;
449 /* Size of `reg_set_table'.
450 The table starts out at max_gcse_regno + slop, and is enlarged as
451 necessary. */
452 static int reg_set_table_size;
454 /* Amount to grow `reg_set_table' by when it's full. */
455 #define REG_SET_TABLE_SLOP 100
457 /* Bitmap containing one bit for each register in the program.
458 Used when performing GCSE to track which registers have been set since
459 the start of the basic block. */
460 static sbitmap reg_set_bitmap;
462 /* For each block, a bitmap of registers set in the block.
463 This is used by expr_killed_p and compute_transp.
464 It is computed during hash table computation and not by compute_sets
465 as it includes registers added since the last pass (or between cprop and
466 gcse) and it's currently not easy to realloc sbitmap vectors. */
467 static sbitmap *reg_set_in_block;
469 /* For each block, non-zero if memory is set in that block.
470 This is computed during hash table computation and is used by
471 expr_killed_p and compute_transp.
472 ??? Handling of memory is very simple, we don't make any attempt
473 to optimize things (later).
474 ??? This can be computed by compute_sets since the information
475 doesn't change. */
476 static char *mem_set_in_block;
478 /* Various variables for statistics gathering. */
480 /* Memory used in a pass.
481 This isn't intended to be absolutely precise. Its intent is only
482 to keep an eye on memory usage. */
483 static int bytes_used;
485 /* GCSE substitutions made. */
486 static int gcse_subst_count;
487 /* Number of copy instructions created. */
488 static int gcse_create_count;
489 /* Number of constants propagated. */
490 static int const_prop_count;
491 /* Number of copys propagated. */
492 static int copy_prop_count;
494 /* These variables are used by classic GCSE.
495 Normally they'd be defined a bit later, but `rd_gen' needs to
496 be declared sooner. */
498 /* Each block has a bitmap of each type.
499 The length of each blocks bitmap is:
501 max_cuid - for reaching definitions
502 n_exprs - for available expressions
504 Thus we view the bitmaps as 2 dimensional arrays. i.e.
505 rd_kill[block_num][cuid_num]
506 ae_kill[block_num][expr_num] */
508 /* For reaching defs */
509 static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
511 /* for available exprs */
512 static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
514 /* Objects of this type are passed around by the null-pointer check
515 removal routines. */
516 struct null_pointer_info
518 /* The basic block being processed. */
519 int current_block;
520 /* The first register to be handled in this pass. */
521 unsigned int min_reg;
522 /* One greater than the last register to be handled in this pass. */
523 unsigned int max_reg;
524 sbitmap *nonnull_local;
525 sbitmap *nonnull_killed;
528 static void compute_can_copy PARAMS ((void));
529 static char *gmalloc PARAMS ((unsigned int));
530 static char *grealloc PARAMS ((char *, unsigned int));
531 static char *gcse_alloc PARAMS ((unsigned long));
532 static void alloc_gcse_mem PARAMS ((rtx));
533 static void free_gcse_mem PARAMS ((void));
534 static void alloc_reg_set_mem PARAMS ((int));
535 static void free_reg_set_mem PARAMS ((void));
536 static int get_bitmap_width PARAMS ((int, int, int));
537 static void record_one_set PARAMS ((int, rtx));
538 static void record_set_info PARAMS ((rtx, rtx, void *));
539 static void compute_sets PARAMS ((rtx));
540 static void hash_scan_insn PARAMS ((rtx, int, int));
541 static void hash_scan_set PARAMS ((rtx, rtx, int));
542 static void hash_scan_clobber PARAMS ((rtx, rtx));
543 static void hash_scan_call PARAMS ((rtx, rtx));
544 static int want_to_gcse_p PARAMS ((rtx));
545 static int oprs_unchanged_p PARAMS ((rtx, rtx, int));
546 static int oprs_anticipatable_p PARAMS ((rtx, rtx));
547 static int oprs_available_p PARAMS ((rtx, rtx));
548 static void insert_expr_in_table PARAMS ((rtx, enum machine_mode, rtx,
549 int, int));
550 static void insert_set_in_table PARAMS ((rtx, rtx));
551 static unsigned int hash_expr PARAMS ((rtx, enum machine_mode, int *, int));
552 static unsigned int hash_expr_1 PARAMS ((rtx, enum machine_mode, int *));
553 static unsigned int hash_string_1 PARAMS ((const char *));
554 static unsigned int hash_set PARAMS ((int, int));
555 static int expr_equiv_p PARAMS ((rtx, rtx));
556 static void record_last_reg_set_info PARAMS ((rtx, int));
557 static void record_last_mem_set_info PARAMS ((rtx));
558 static void record_last_set_info PARAMS ((rtx, rtx, void *));
559 static void compute_hash_table PARAMS ((int));
560 static void alloc_set_hash_table PARAMS ((int));
561 static void free_set_hash_table PARAMS ((void));
562 static void compute_set_hash_table PARAMS ((void));
563 static void alloc_expr_hash_table PARAMS ((unsigned int));
564 static void free_expr_hash_table PARAMS ((void));
565 static void compute_expr_hash_table PARAMS ((void));
566 static void dump_hash_table PARAMS ((FILE *, const char *, struct expr **,
567 int, int));
568 static struct expr *lookup_expr PARAMS ((rtx));
569 static struct expr *lookup_set PARAMS ((unsigned int, rtx));
570 static struct expr *next_set PARAMS ((unsigned int, struct expr *));
571 static void reset_opr_set_tables PARAMS ((void));
572 static int oprs_not_set_p PARAMS ((rtx, rtx));
573 static void mark_call PARAMS ((rtx));
574 static void mark_set PARAMS ((rtx, rtx));
575 static void mark_clobber PARAMS ((rtx, rtx));
576 static void mark_oprs_set PARAMS ((rtx));
577 static void alloc_cprop_mem PARAMS ((int, int));
578 static void free_cprop_mem PARAMS ((void));
579 static void compute_transp PARAMS ((rtx, int, sbitmap *, int));
580 static void compute_transpout PARAMS ((void));
581 static void compute_local_properties PARAMS ((sbitmap *, sbitmap *, sbitmap *,
582 int));
583 static void compute_cprop_data PARAMS ((void));
584 static void find_used_regs PARAMS ((rtx));
585 static int try_replace_reg PARAMS ((rtx, rtx, rtx));
586 static struct expr *find_avail_set PARAMS ((int, rtx));
587 static int cprop_jump PARAMS ((rtx, rtx, struct reg_use *, rtx));
588 #ifdef HAVE_cc0
589 static int cprop_cc0_jump PARAMS ((rtx, struct reg_use *, rtx));
590 #endif
591 static int cprop_insn PARAMS ((rtx, int));
592 static int cprop PARAMS ((int));
593 static int one_cprop_pass PARAMS ((int, int));
594 static void alloc_pre_mem PARAMS ((int, int));
595 static void free_pre_mem PARAMS ((void));
596 static void compute_pre_data PARAMS ((void));
597 static int pre_expr_reaches_here_p PARAMS ((int, struct expr *, int));
598 static void insert_insn_end_bb PARAMS ((struct expr *, int, int));
599 static void pre_insert_copy_insn PARAMS ((struct expr *, rtx));
600 static void pre_insert_copies PARAMS ((void));
601 static int pre_delete PARAMS ((void));
602 static int pre_gcse PARAMS ((void));
603 static int one_pre_gcse_pass PARAMS ((int));
604 static void add_label_notes PARAMS ((rtx, rtx));
605 static void alloc_code_hoist_mem PARAMS ((int, int));
606 static void free_code_hoist_mem PARAMS ((void));
607 static void compute_code_hoist_vbeinout PARAMS ((void));
608 static void compute_code_hoist_data PARAMS ((void));
609 static int hoist_expr_reaches_here_p PARAMS ((int, int, int, char *));
610 static void hoist_code PARAMS ((void));
611 static int one_code_hoisting_pass PARAMS ((void));
612 static void alloc_rd_mem PARAMS ((int, int));
613 static void free_rd_mem PARAMS ((void));
614 static void handle_rd_kill_set PARAMS ((rtx, int, int));
615 static void compute_kill_rd PARAMS ((void));
616 static void compute_rd PARAMS ((void));
617 static void alloc_avail_expr_mem PARAMS ((int, int));
618 static void free_avail_expr_mem PARAMS ((void));
619 static void compute_ae_gen PARAMS ((void));
620 static int expr_killed_p PARAMS ((rtx, int));
621 static void compute_ae_kill PARAMS ((sbitmap *, sbitmap *));
622 static int expr_reaches_here_p PARAMS ((struct occr *, struct expr *,
623 int, int));
624 static rtx computing_insn PARAMS ((struct expr *, rtx));
625 static int def_reaches_here_p PARAMS ((rtx, rtx));
626 static int can_disregard_other_sets PARAMS ((struct reg_set **, rtx, int));
627 static int handle_avail_expr PARAMS ((rtx, struct expr *));
628 static int classic_gcse PARAMS ((void));
629 static int one_classic_gcse_pass PARAMS ((int));
630 static void invalidate_nonnull_info PARAMS ((rtx, rtx, void *));
631 static void delete_null_pointer_checks_1 PARAMS ((varray_type *, unsigned int *,
632 sbitmap *, sbitmap *,
633 struct null_pointer_info *));
634 static rtx process_insert_insn PARAMS ((struct expr *));
635 static int pre_edge_insert PARAMS ((struct edge_list *, struct expr **));
636 static int expr_reaches_here_p_work PARAMS ((struct occr *, struct expr *,
637 int, int, char *));
638 static int pre_expr_reaches_here_p_work PARAMS ((int, struct expr *,
639 int, char *));
641 /* Entry point for global common subexpression elimination.
642 F is the first instruction in the function. */
645 gcse_main (f, file)
646 rtx f;
647 FILE *file;
649 int changed, pass;
650 /* Bytes used at start of pass. */
651 int initial_bytes_used;
652 /* Maximum number of bytes used by a pass. */
653 int max_pass_bytes;
654 /* Point to release obstack data from for each pass. */
655 char *gcse_obstack_bottom;
657 /* We do not construct an accurate cfg in functions which call
658 setjmp, so just punt to be safe. */
659 if (current_function_calls_setjmp)
660 return 0;
662 /* Assume that we do not need to run jump optimizations after gcse. */
663 run_jump_opt_after_gcse = 0;
665 /* For calling dump_foo fns from gdb. */
666 debug_stderr = stderr;
667 gcse_file = file;
669 /* Identify the basic block information for this function, including
670 successors and predecessors. */
671 max_gcse_regno = max_reg_num ();
673 if (file)
674 dump_flow_info (file);
676 /* Return if there's nothing to do. */
677 if (n_basic_blocks <= 1)
678 return 0;
680 /* Trying to perform global optimizations on flow graphs which have
681 a high connectivity will take a long time and is unlikely to be
682 particularly useful.
684 In normal circumstances a cfg should have about twice as many edges
685 as blocks. But we do not want to punish small functions which have
686 a couple switch statements. So we require a relatively large number
687 of basic blocks and the ratio of edges to blocks to be high. */
688 if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
690 if (warn_disabled_optimization)
691 warning ("GCSE disabled: %d > 1000 basic blocks and %d >= 20 edges/basic block",
692 n_basic_blocks, n_edges / n_basic_blocks);
693 return 0;
696 /* If allocating memory for the cprop bitmap would take up too much
697 storage it's better just to disable the optimization. */
698 if ((n_basic_blocks
699 * SBITMAP_SET_SIZE (max_gcse_regno)
700 * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
702 if (warn_disabled_optimization)
703 warning ("GCSE disabled: %d basic blocks and %d registers",
704 n_basic_blocks, max_gcse_regno);
706 return 0;
709 /* See what modes support reg/reg copy operations. */
710 if (! can_copy_init_p)
712 compute_can_copy ();
713 can_copy_init_p = 1;
716 gcc_obstack_init (&gcse_obstack);
717 bytes_used = 0;
719 /* Record where pseudo-registers are set. This data is kept accurate
720 during each pass. ??? We could also record hard-reg information here
721 [since it's unchanging], however it is currently done during hash table
722 computation.
724 It may be tempting to compute MEM set information here too, but MEM sets
725 will be subject to code motion one day and thus we need to compute
726 information about memory sets when we build the hash tables. */
728 alloc_reg_set_mem (max_gcse_regno);
729 compute_sets (f);
731 pass = 0;
732 initial_bytes_used = bytes_used;
733 max_pass_bytes = 0;
734 gcse_obstack_bottom = gcse_alloc (1);
735 changed = 1;
736 while (changed && pass < MAX_PASSES)
738 changed = 0;
739 if (file)
740 fprintf (file, "GCSE pass %d\n\n", pass + 1);
742 /* Initialize bytes_used to the space for the pred/succ lists,
743 and the reg_set_table data. */
744 bytes_used = initial_bytes_used;
746 /* Each pass may create new registers, so recalculate each time. */
747 max_gcse_regno = max_reg_num ();
749 alloc_gcse_mem (f);
751 /* Don't allow constant propagation to modify jumps
752 during this pass. */
753 changed = one_cprop_pass (pass + 1, 0);
755 if (optimize_size)
756 changed |= one_classic_gcse_pass (pass + 1);
757 else
759 changed |= one_pre_gcse_pass (pass + 1);
760 free_reg_set_mem ();
761 alloc_reg_set_mem (max_reg_num ());
762 compute_sets (f);
763 run_jump_opt_after_gcse = 1;
766 if (max_pass_bytes < bytes_used)
767 max_pass_bytes = bytes_used;
769 /* Free up memory, then reallocate for code hoisting. We can
770 not re-use the existing allocated memory because the tables
771 will not have info for the insns or registers created by
772 partial redundancy elimination. */
773 free_gcse_mem ();
775 /* It does not make sense to run code hoisting unless we optimizing
776 for code size -- it rarely makes programs faster, and can make
777 them bigger if we did partial redundancy elimination (when optimizing
778 for space, we use a classic gcse algorithm instead of partial
779 redundancy algorithms). */
780 if (optimize_size)
782 max_gcse_regno = max_reg_num ();
783 alloc_gcse_mem (f);
784 changed |= one_code_hoisting_pass ();
785 free_gcse_mem ();
787 if (max_pass_bytes < bytes_used)
788 max_pass_bytes = bytes_used;
791 if (file)
793 fprintf (file, "\n");
794 fflush (file);
797 obstack_free (&gcse_obstack, gcse_obstack_bottom);
798 pass++;
801 /* Do one last pass of copy propagation, including cprop into
802 conditional jumps. */
804 max_gcse_regno = max_reg_num ();
805 alloc_gcse_mem (f);
806 /* This time, go ahead and allow cprop to alter jumps. */
807 one_cprop_pass (pass + 1, 1);
808 free_gcse_mem ();
810 if (file)
812 fprintf (file, "GCSE of %s: %d basic blocks, ",
813 current_function_name, n_basic_blocks);
814 fprintf (file, "%d pass%s, %d bytes\n\n",
815 pass, pass > 1 ? "es" : "", max_pass_bytes);
818 obstack_free (&gcse_obstack, NULL_PTR);
819 free_reg_set_mem ();
820 return run_jump_opt_after_gcse;
823 /* Misc. utilities. */
825 /* Compute which modes support reg/reg copy operations. */
827 static void
828 compute_can_copy ()
830 int i;
831 #ifndef AVOID_CCMODE_COPIES
832 rtx reg,insn;
833 #endif
834 memset (can_copy_p, 0, NUM_MACHINE_MODES);
836 start_sequence ();
837 for (i = 0; i < NUM_MACHINE_MODES; i++)
838 if (GET_MODE_CLASS (i) == MODE_CC)
840 #ifdef AVOID_CCMODE_COPIES
841 can_copy_p[i] = 0;
842 #else
843 reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
844 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
845 if (recog (PATTERN (insn), insn, NULL_PTR) >= 0)
846 can_copy_p[i] = 1;
847 #endif
849 else
850 can_copy_p[i] = 1;
852 end_sequence ();
855 /* Cover function to xmalloc to record bytes allocated. */
857 static char *
858 gmalloc (size)
859 unsigned int size;
861 bytes_used += size;
862 return xmalloc (size);
865 /* Cover function to xrealloc.
866 We don't record the additional size since we don't know it.
867 It won't affect memory usage stats much anyway. */
869 static char *
870 grealloc (ptr, size)
871 char *ptr;
872 unsigned int size;
874 return xrealloc (ptr, size);
877 /* Cover function to obstack_alloc.
878 We don't need to record the bytes allocated here since
879 obstack_chunk_alloc is set to gmalloc. */
881 static char *
882 gcse_alloc (size)
883 unsigned long size;
885 return (char *) obstack_alloc (&gcse_obstack, size);
888 /* Allocate memory for the cuid mapping array,
889 and reg/memory set tracking tables.
891 This is called at the start of each pass. */
893 static void
894 alloc_gcse_mem (f)
895 rtx f;
897 int i,n;
898 rtx insn;
900 /* Find the largest UID and create a mapping from UIDs to CUIDs.
901 CUIDs are like UIDs except they increase monotonically, have no gaps,
902 and only apply to real insns. */
904 max_uid = get_max_uid ();
905 n = (max_uid + 1) * sizeof (int);
906 uid_cuid = (int *) gmalloc (n);
907 memset ((char *) uid_cuid, 0, n);
908 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
910 if (INSN_P (insn))
911 uid_cuid[INSN_UID (insn)] = i++;
912 else
913 uid_cuid[INSN_UID (insn)] = i;
916 /* Create a table mapping cuids to insns. */
918 max_cuid = i;
919 n = (max_cuid + 1) * sizeof (rtx);
920 cuid_insn = (rtx *) gmalloc (n);
921 memset ((char *) cuid_insn, 0, n);
922 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
923 if (INSN_P (insn))
924 CUID_INSN (i++) = insn;
926 /* Allocate vars to track sets of regs. */
927 reg_set_bitmap = (sbitmap) sbitmap_alloc (max_gcse_regno);
929 /* Allocate vars to track sets of regs, memory per block. */
930 reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks,
931 max_gcse_regno);
932 mem_set_in_block = (char *) gmalloc (n_basic_blocks);
935 /* Free memory allocated by alloc_gcse_mem. */
937 static void
938 free_gcse_mem ()
940 free (uid_cuid);
941 free (cuid_insn);
943 free (reg_set_bitmap);
945 free (reg_set_in_block);
946 free (mem_set_in_block);
949 /* Many of the global optimization algorithms work by solving dataflow
950 equations for various expressions. Initially, some local value is
951 computed for each expression in each block. Then, the values across the
952 various blocks are combined (by following flow graph edges) to arrive at
953 global values. Conceptually, each set of equations is independent. We
954 may therefore solve all the equations in parallel, solve them one at a
955 time, or pick any intermediate approach.
957 When you're going to need N two-dimensional bitmaps, each X (say, the
958 number of blocks) by Y (say, the number of expressions), call this
959 function. It's not important what X and Y represent; only that Y
960 correspond to the things that can be done in parallel. This function will
961 return an appropriate chunking factor C; you should solve C sets of
962 equations in parallel. By going through this function, we can easily
963 trade space against time; by solving fewer equations in parallel we use
964 less space. */
966 static int
967 get_bitmap_width (n, x, y)
968 int n;
969 int x;
970 int y;
972 /* It's not really worth figuring out *exactly* how much memory will
973 be used by a particular choice. The important thing is to get
974 something approximately right. */
975 size_t max_bitmap_memory = 10 * 1024 * 1024;
977 /* The number of bytes we'd use for a single column of minimum
978 width. */
979 size_t column_size = n * x * sizeof (SBITMAP_ELT_TYPE);
981 /* Often, it's reasonable just to solve all the equations in
982 parallel. */
983 if (column_size * SBITMAP_SET_SIZE (y) <= max_bitmap_memory)
984 return y;
986 /* Otherwise, pick the largest width we can, without going over the
987 limit. */
988 return SBITMAP_ELT_BITS * ((max_bitmap_memory + column_size - 1)
989 / column_size);
992 /* Compute the local properties of each recorded expression.
994 Local properties are those that are defined by the block, irrespective of
995 other blocks.
997 An expression is transparent in a block if its operands are not modified
998 in the block.
1000 An expression is computed (locally available) in a block if it is computed
1001 at least once and expression would contain the same value if the
1002 computation was moved to the end of the block.
1004 An expression is locally anticipatable in a block if it is computed at
1005 least once and expression would contain the same value if the computation
1006 was moved to the beginning of the block.
1008 We call this routine for cprop, pre and code hoisting. They all compute
1009 basically the same information and thus can easily share this code.
1011 TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
1012 properties. If NULL, then it is not necessary to compute or record that
1013 particular property.
1015 SETP controls which hash table to look at. If zero, this routine looks at
1016 the expr hash table; if nonzero this routine looks at the set hash table.
1017 Additionally, TRANSP is computed as ~TRANSP, since this is really cprop's
1018 ABSALTERED. */
1020 static void
1021 compute_local_properties (transp, comp, antloc, setp)
1022 sbitmap *transp;
1023 sbitmap *comp;
1024 sbitmap *antloc;
1025 int setp;
1027 unsigned int i, hash_table_size;
1028 struct expr **hash_table;
1030 /* Initialize any bitmaps that were passed in. */
1031 if (transp)
1033 if (setp)
1034 sbitmap_vector_zero (transp, n_basic_blocks);
1035 else
1036 sbitmap_vector_ones (transp, n_basic_blocks);
1039 if (comp)
1040 sbitmap_vector_zero (comp, n_basic_blocks);
1041 if (antloc)
1042 sbitmap_vector_zero (antloc, n_basic_blocks);
1044 /* We use the same code for cprop, pre and hoisting. For cprop
1045 we care about the set hash table, for pre and hoisting we
1046 care about the expr hash table. */
1047 hash_table_size = setp ? set_hash_table_size : expr_hash_table_size;
1048 hash_table = setp ? set_hash_table : expr_hash_table;
1050 for (i = 0; i < hash_table_size; i++)
1052 struct expr *expr;
1054 for (expr = hash_table[i]; expr != NULL; expr = expr->next_same_hash)
1056 int indx = expr->bitmap_index;
1057 struct occr *occr;
1059 /* The expression is transparent in this block if it is not killed.
1060 We start by assuming all are transparent [none are killed], and
1061 then reset the bits for those that are. */
1062 if (transp)
1063 compute_transp (expr->expr, indx, transp, setp);
1065 /* The occurrences recorded in antic_occr are exactly those that
1066 we want to set to non-zero in ANTLOC. */
1067 if (antloc)
1068 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1070 SET_BIT (antloc[BLOCK_NUM (occr->insn)], indx);
1072 /* While we're scanning the table, this is a good place to
1073 initialize this. */
1074 occr->deleted_p = 0;
1077 /* The occurrences recorded in avail_occr are exactly those that
1078 we want to set to non-zero in COMP. */
1079 if (comp)
1080 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
1082 SET_BIT (comp[BLOCK_NUM (occr->insn)], indx);
1084 /* While we're scanning the table, this is a good place to
1085 initialize this. */
1086 occr->copied_p = 0;
1089 /* While we're scanning the table, this is a good place to
1090 initialize this. */
1091 expr->reaching_reg = 0;
1096 /* Register set information.
1098 `reg_set_table' records where each register is set or otherwise
1099 modified. */
1101 static struct obstack reg_set_obstack;
1103 static void
1104 alloc_reg_set_mem (n_regs)
1105 int n_regs;
1107 unsigned int n;
1109 reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
1110 n = reg_set_table_size * sizeof (struct reg_set *);
1111 reg_set_table = (struct reg_set **) gmalloc (n);
1112 memset ((char *) reg_set_table, 0, n);
1114 gcc_obstack_init (&reg_set_obstack);
1117 static void
1118 free_reg_set_mem ()
1120 free (reg_set_table);
1121 obstack_free (&reg_set_obstack, NULL_PTR);
1124 /* Record REGNO in the reg_set table. */
1126 static void
1127 record_one_set (regno, insn)
1128 int regno;
1129 rtx insn;
1131 /* allocate a new reg_set element and link it onto the list */
1132 struct reg_set *new_reg_info;
1134 /* If the table isn't big enough, enlarge it. */
1135 if (regno >= reg_set_table_size)
1137 int new_size = regno + REG_SET_TABLE_SLOP;
1139 reg_set_table
1140 = (struct reg_set **) grealloc ((char *) reg_set_table,
1141 new_size * sizeof (struct reg_set *));
1142 memset ((char *) (reg_set_table + reg_set_table_size), 0,
1143 (new_size - reg_set_table_size) * sizeof (struct reg_set *));
1144 reg_set_table_size = new_size;
1147 new_reg_info = (struct reg_set *) obstack_alloc (&reg_set_obstack,
1148 sizeof (struct reg_set));
1149 bytes_used += sizeof (struct reg_set);
1150 new_reg_info->insn = insn;
1151 new_reg_info->next = reg_set_table[regno];
1152 reg_set_table[regno] = new_reg_info;
1155 /* Called from compute_sets via note_stores to handle one SET or CLOBBER in
1156 an insn. The DATA is really the instruction in which the SET is
1157 occurring. */
1159 static void
1160 record_set_info (dest, setter, data)
1161 rtx dest, setter ATTRIBUTE_UNUSED;
1162 void *data;
1164 rtx record_set_insn = (rtx) data;
1166 if (GET_CODE (dest) == REG && REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1167 record_one_set (REGNO (dest), record_set_insn);
1170 /* Scan the function and record each set of each pseudo-register.
1172 This is called once, at the start of the gcse pass. See the comments for
1173 `reg_set_table' for further documenation. */
1175 static void
1176 compute_sets (f)
1177 rtx f;
1179 rtx insn;
1181 for (insn = f; insn != 0; insn = NEXT_INSN (insn))
1182 if (INSN_P (insn))
1183 note_stores (PATTERN (insn), record_set_info, insn);
1186 /* Hash table support. */
1188 /* For each register, the cuid of the first/last insn in the block to set it,
1189 or -1 if not set. */
1190 #define NEVER_SET -1
1191 static int *reg_first_set;
1192 static int *reg_last_set;
1194 /* While computing "first/last set" info, this is the CUID of first/last insn
1195 to set memory or -1 if not set. `mem_last_set' is also used when
1196 performing GCSE to record whether memory has been set since the beginning
1197 of the block.
1199 Note that handling of memory is very simple, we don't make any attempt
1200 to optimize things (later). */
1201 static int mem_first_set;
1202 static int mem_last_set;
1204 /* Perform a quick check whether X, the source of a set, is something
1205 we want to consider for GCSE. */
1207 static int
1208 want_to_gcse_p (x)
1209 rtx x;
1211 switch (GET_CODE (x))
1213 case REG:
1214 case SUBREG:
1215 case CONST_INT:
1216 case CONST_DOUBLE:
1217 case CALL:
1218 return 0;
1220 default:
1221 break;
1224 return 1;
1227 /* Return non-zero if the operands of expression X are unchanged from the
1228 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1229 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */
1231 static int
1232 oprs_unchanged_p (x, insn, avail_p)
1233 rtx x, insn;
1234 int avail_p;
1236 int i, j;
1237 enum rtx_code code;
1238 const char *fmt;
1240 if (x == 0)
1241 return 1;
1243 code = GET_CODE (x);
1244 switch (code)
1246 case REG:
1247 if (avail_p)
1248 return (reg_last_set[REGNO (x)] == NEVER_SET
1249 || reg_last_set[REGNO (x)] < INSN_CUID (insn));
1250 else
1251 return (reg_first_set[REGNO (x)] == NEVER_SET
1252 || reg_first_set[REGNO (x)] >= INSN_CUID (insn));
1254 case MEM:
1255 if (avail_p && mem_last_set != NEVER_SET
1256 && mem_last_set >= INSN_CUID (insn))
1257 return 0;
1258 else if (! avail_p && mem_first_set != NEVER_SET
1259 && mem_first_set < INSN_CUID (insn))
1260 return 0;
1261 else
1262 return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
1264 case PRE_DEC:
1265 case PRE_INC:
1266 case POST_DEC:
1267 case POST_INC:
1268 case PRE_MODIFY:
1269 case POST_MODIFY:
1270 return 0;
1272 case PC:
1273 case CC0: /*FIXME*/
1274 case CONST:
1275 case CONST_INT:
1276 case CONST_DOUBLE:
1277 case SYMBOL_REF:
1278 case LABEL_REF:
1279 case ADDR_VEC:
1280 case ADDR_DIFF_VEC:
1281 return 1;
1283 default:
1284 break;
1287 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1289 if (fmt[i] == 'e')
1291 /* If we are about to do the last recursive call needed at this
1292 level, change it into iteration. This function is called enough
1293 to be worth it. */
1294 if (i == 0)
1295 return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
1297 else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
1298 return 0;
1300 else if (fmt[i] == 'E')
1301 for (j = 0; j < XVECLEN (x, i); j++)
1302 if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1303 return 0;
1306 return 1;
1309 /* Return non-zero if the operands of expression X are unchanged from
1310 the start of INSN's basic block up to but not including INSN. */
1312 static int
1313 oprs_anticipatable_p (x, insn)
1314 rtx x, insn;
1316 return oprs_unchanged_p (x, insn, 0);
1319 /* Return non-zero if the operands of expression X are unchanged from
1320 INSN to the end of INSN's basic block. */
1322 static int
1323 oprs_available_p (x, insn)
1324 rtx x, insn;
1326 return oprs_unchanged_p (x, insn, 1);
1329 /* Hash expression X.
1331 MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean
1332 indicating if a volatile operand is found or if the expression contains
1333 something we don't want to insert in the table.
1335 ??? One might want to merge this with canon_hash. Later. */
1337 static unsigned int
1338 hash_expr (x, mode, do_not_record_p, hash_table_size)
1339 rtx x;
1340 enum machine_mode mode;
1341 int *do_not_record_p;
1342 int hash_table_size;
1344 unsigned int hash;
1346 *do_not_record_p = 0;
1348 hash = hash_expr_1 (x, mode, do_not_record_p);
1349 return hash % hash_table_size;
1351 /* Hash a string. Just add its bytes up. */
1352 static inline unsigned
1353 hash_string_1 (ps)
1354 const char *ps;
1356 unsigned hash = 0;
1357 const unsigned char *p = (const unsigned char *)ps;
1359 if (p)
1360 while (*p)
1361 hash += *p++;
1363 return hash;
1366 /* Subroutine of hash_expr to do the actual work. */
1368 static unsigned int
1369 hash_expr_1 (x, mode, do_not_record_p)
1370 rtx x;
1371 enum machine_mode mode;
1372 int *do_not_record_p;
1374 int i, j;
1375 unsigned hash = 0;
1376 enum rtx_code code;
1377 const char *fmt;
1379 /* Used to turn recursion into iteration. We can't rely on GCC's
1380 tail-recursion eliminatio since we need to keep accumulating values
1381 in HASH. */
1383 if (x == 0)
1384 return hash;
1386 repeat:
1387 code = GET_CODE (x);
1388 switch (code)
1390 case REG:
1391 hash += ((unsigned int) REG << 7) + REGNO (x);
1392 return hash;
1394 case CONST_INT:
1395 hash += (((unsigned int) CONST_INT << 7) + (unsigned int) mode
1396 + (unsigned int) INTVAL (x));
1397 return hash;
1399 case CONST_DOUBLE:
1400 /* This is like the general case, except that it only counts
1401 the integers representing the constant. */
1402 hash += (unsigned int) code + (unsigned int) GET_MODE (x);
1403 if (GET_MODE (x) != VOIDmode)
1404 for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++)
1405 hash += (unsigned int) XWINT (x, i);
1406 else
1407 hash += ((unsigned int) CONST_DOUBLE_LOW (x)
1408 + (unsigned int) CONST_DOUBLE_HIGH (x));
1409 return hash;
1411 /* Assume there is only one rtx object for any given label. */
1412 case LABEL_REF:
1413 /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1414 differences and differences between each stage's debugging dumps. */
1415 hash += (((unsigned int) LABEL_REF << 7)
1416 + CODE_LABEL_NUMBER (XEXP (x, 0)));
1417 return hash;
1419 case SYMBOL_REF:
1421 /* Don't hash on the symbol's address to avoid bootstrap differences.
1422 Different hash values may cause expressions to be recorded in
1423 different orders and thus different registers to be used in the
1424 final assembler. This also avoids differences in the dump files
1425 between various stages. */
1426 unsigned int h = 0;
1427 const unsigned char *p = (const unsigned char *) XSTR (x, 0);
1429 while (*p)
1430 h += (h << 7) + *p++; /* ??? revisit */
1432 hash += ((unsigned int) SYMBOL_REF << 7) + h;
1433 return hash;
1436 case MEM:
1437 if (MEM_VOLATILE_P (x))
1439 *do_not_record_p = 1;
1440 return 0;
1443 hash += (unsigned int) MEM;
1444 hash += MEM_ALIAS_SET (x);
1445 x = XEXP (x, 0);
1446 goto repeat;
1448 case PRE_DEC:
1449 case PRE_INC:
1450 case POST_DEC:
1451 case POST_INC:
1452 case PC:
1453 case CC0:
1454 case CALL:
1455 case UNSPEC_VOLATILE:
1456 *do_not_record_p = 1;
1457 return 0;
1459 case ASM_OPERANDS:
1460 if (MEM_VOLATILE_P (x))
1462 *do_not_record_p = 1;
1463 return 0;
1465 else
1467 /* We don't want to take the filename and line into account. */
1468 hash += (unsigned) code + (unsigned) GET_MODE (x)
1469 + hash_string_1 (ASM_OPERANDS_TEMPLATE (x))
1470 + hash_string_1 (ASM_OPERANDS_OUTPUT_CONSTRAINT (x))
1471 + (unsigned) ASM_OPERANDS_OUTPUT_IDX (x);
1473 if (ASM_OPERANDS_INPUT_LENGTH (x))
1475 for (i = 1; i < ASM_OPERANDS_INPUT_LENGTH (x); i++)
1477 hash += (hash_expr_1 (ASM_OPERANDS_INPUT (x, i),
1478 GET_MODE (ASM_OPERANDS_INPUT (x, i)),
1479 do_not_record_p)
1480 + hash_string_1 (ASM_OPERANDS_INPUT_CONSTRAINT
1481 (x, i)));
1484 hash += hash_string_1 (ASM_OPERANDS_INPUT_CONSTRAINT (x, 0));
1485 x = ASM_OPERANDS_INPUT (x, 0);
1486 mode = GET_MODE (x);
1487 goto repeat;
1489 return hash;
1492 default:
1493 break;
1496 hash += (unsigned) code + (unsigned) GET_MODE (x);
1497 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1499 if (fmt[i] == 'e')
1501 /* If we are about to do the last recursive call
1502 needed at this level, change it into iteration.
1503 This function is called enough to be worth it. */
1504 if (i == 0)
1506 x = XEXP (x, i);
1507 goto repeat;
1510 hash += hash_expr_1 (XEXP (x, i), 0, do_not_record_p);
1511 if (*do_not_record_p)
1512 return 0;
1515 else if (fmt[i] == 'E')
1516 for (j = 0; j < XVECLEN (x, i); j++)
1518 hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p);
1519 if (*do_not_record_p)
1520 return 0;
1523 else if (fmt[i] == 's')
1524 hash += hash_string_1 (XSTR (x, i));
1525 else if (fmt[i] == 'i')
1526 hash += (unsigned int) XINT (x, i);
1527 else
1528 abort ();
1531 return hash;
1534 /* Hash a set of register REGNO.
1536 Sets are hashed on the register that is set. This simplifies the PRE copy
1537 propagation code.
1539 ??? May need to make things more elaborate. Later, as necessary. */
1541 static unsigned int
1542 hash_set (regno, hash_table_size)
1543 int regno;
1544 int hash_table_size;
1546 unsigned int hash;
1548 hash = regno;
1549 return hash % hash_table_size;
1552 /* Return non-zero if exp1 is equivalent to exp2.
1553 ??? Borrowed from cse.c. Might want to remerge with cse.c. Later. */
1555 static int
1556 expr_equiv_p (x, y)
1557 rtx x, y;
1559 register int i, j;
1560 register enum rtx_code code;
1561 register const char *fmt;
1563 if (x == y)
1564 return 1;
1566 if (x == 0 || y == 0)
1567 return x == y;
1569 code = GET_CODE (x);
1570 if (code != GET_CODE (y))
1571 return 0;
1573 /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent. */
1574 if (GET_MODE (x) != GET_MODE (y))
1575 return 0;
1577 switch (code)
1579 case PC:
1580 case CC0:
1581 return x == y;
1583 case CONST_INT:
1584 return INTVAL (x) == INTVAL (y);
1586 case LABEL_REF:
1587 return XEXP (x, 0) == XEXP (y, 0);
1589 case SYMBOL_REF:
1590 return XSTR (x, 0) == XSTR (y, 0);
1592 case REG:
1593 return REGNO (x) == REGNO (y);
1595 case MEM:
1596 /* Can't merge two expressions in different alias sets, since we can
1597 decide that the expression is transparent in a block when it isn't,
1598 due to it being set with the different alias set. */
1599 if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y))
1600 return 0;
1601 break;
1603 /* For commutative operations, check both orders. */
1604 case PLUS:
1605 case MULT:
1606 case AND:
1607 case IOR:
1608 case XOR:
1609 case NE:
1610 case EQ:
1611 return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0))
1612 && expr_equiv_p (XEXP (x, 1), XEXP (y, 1)))
1613 || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1))
1614 && expr_equiv_p (XEXP (x, 1), XEXP (y, 0))));
1616 case ASM_OPERANDS:
1617 /* We don't use the generic code below because we want to
1618 disregard filename and line numbers. */
1620 /* A volatile asm isn't equivalent to any other. */
1621 if (MEM_VOLATILE_P (x) || MEM_VOLATILE_P (y))
1622 return 0;
1624 if (GET_MODE (x) != GET_MODE (y)
1625 || strcmp (ASM_OPERANDS_TEMPLATE (x), ASM_OPERANDS_TEMPLATE (y))
1626 || strcmp (ASM_OPERANDS_OUTPUT_CONSTRAINT (x),
1627 ASM_OPERANDS_OUTPUT_CONSTRAINT (y))
1628 || ASM_OPERANDS_OUTPUT_IDX (x) != ASM_OPERANDS_OUTPUT_IDX (y)
1629 || ASM_OPERANDS_INPUT_LENGTH (x) != ASM_OPERANDS_INPUT_LENGTH (y))
1630 return 0;
1632 if (ASM_OPERANDS_INPUT_LENGTH (x))
1634 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
1635 if (! expr_equiv_p (ASM_OPERANDS_INPUT (x, i),
1636 ASM_OPERANDS_INPUT (y, i))
1637 || strcmp (ASM_OPERANDS_INPUT_CONSTRAINT (x, i),
1638 ASM_OPERANDS_INPUT_CONSTRAINT (y, i)))
1639 return 0;
1642 return 1;
1644 default:
1645 break;
1648 /* Compare the elements. If any pair of corresponding elements
1649 fail to match, return 0 for the whole thing. */
1651 fmt = GET_RTX_FORMAT (code);
1652 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1654 switch (fmt[i])
1656 case 'e':
1657 if (! expr_equiv_p (XEXP (x, i), XEXP (y, i)))
1658 return 0;
1659 break;
1661 case 'E':
1662 if (XVECLEN (x, i) != XVECLEN (y, i))
1663 return 0;
1664 for (j = 0; j < XVECLEN (x, i); j++)
1665 if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j)))
1666 return 0;
1667 break;
1669 case 's':
1670 if (strcmp (XSTR (x, i), XSTR (y, i)))
1671 return 0;
1672 break;
1674 case 'i':
1675 if (XINT (x, i) != XINT (y, i))
1676 return 0;
1677 break;
1679 case 'w':
1680 if (XWINT (x, i) != XWINT (y, i))
1681 return 0;
1682 break;
1684 case '0':
1685 break;
1687 default:
1688 abort ();
1692 return 1;
1695 /* Insert expression X in INSN in the hash table.
1696 If it is already present, record it as the last occurrence in INSN's
1697 basic block.
1699 MODE is the mode of the value X is being stored into.
1700 It is only used if X is a CONST_INT.
1702 ANTIC_P is non-zero if X is an anticipatable expression.
1703 AVAIL_P is non-zero if X is an available expression. */
1705 static void
1706 insert_expr_in_table (x, mode, insn, antic_p, avail_p)
1707 rtx x;
1708 enum machine_mode mode;
1709 rtx insn;
1710 int antic_p, avail_p;
1712 int found, do_not_record_p;
1713 unsigned int hash;
1714 struct expr *cur_expr, *last_expr = NULL;
1715 struct occr *antic_occr, *avail_occr;
1716 struct occr *last_occr = NULL;
1718 hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size);
1720 /* Do not insert expression in table if it contains volatile operands,
1721 or if hash_expr determines the expression is something we don't want
1722 to or can't handle. */
1723 if (do_not_record_p)
1724 return;
1726 cur_expr = expr_hash_table[hash];
1727 found = 0;
1729 while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1731 /* If the expression isn't found, save a pointer to the end of
1732 the list. */
1733 last_expr = cur_expr;
1734 cur_expr = cur_expr->next_same_hash;
1737 if (! found)
1739 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1740 bytes_used += sizeof (struct expr);
1741 if (expr_hash_table[hash] == NULL)
1742 /* This is the first pattern that hashed to this index. */
1743 expr_hash_table[hash] = cur_expr;
1744 else
1745 /* Add EXPR to end of this hash chain. */
1746 last_expr->next_same_hash = cur_expr;
1748 /* Set the fields of the expr element. */
1749 cur_expr->expr = x;
1750 cur_expr->bitmap_index = n_exprs++;
1751 cur_expr->next_same_hash = NULL;
1752 cur_expr->antic_occr = NULL;
1753 cur_expr->avail_occr = NULL;
1756 /* Now record the occurrence(s). */
1757 if (antic_p)
1759 antic_occr = cur_expr->antic_occr;
1761 /* Search for another occurrence in the same basic block. */
1762 while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
1764 /* If an occurrence isn't found, save a pointer to the end of
1765 the list. */
1766 last_occr = antic_occr;
1767 antic_occr = antic_occr->next;
1770 if (antic_occr)
1771 /* Found another instance of the expression in the same basic block.
1772 Prefer the currently recorded one. We want the first one in the
1773 block and the block is scanned from start to end. */
1774 ; /* nothing to do */
1775 else
1777 /* First occurrence of this expression in this basic block. */
1778 antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1779 bytes_used += sizeof (struct occr);
1780 /* First occurrence of this expression in any block? */
1781 if (cur_expr->antic_occr == NULL)
1782 cur_expr->antic_occr = antic_occr;
1783 else
1784 last_occr->next = antic_occr;
1786 antic_occr->insn = insn;
1787 antic_occr->next = NULL;
1791 if (avail_p)
1793 avail_occr = cur_expr->avail_occr;
1795 /* Search for another occurrence in the same basic block. */
1796 while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn))
1798 /* If an occurrence isn't found, save a pointer to the end of
1799 the list. */
1800 last_occr = avail_occr;
1801 avail_occr = avail_occr->next;
1804 if (avail_occr)
1805 /* Found another instance of the expression in the same basic block.
1806 Prefer this occurrence to the currently recorded one. We want
1807 the last one in the block and the block is scanned from start
1808 to end. */
1809 avail_occr->insn = insn;
1810 else
1812 /* First occurrence of this expression in this basic block. */
1813 avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1814 bytes_used += sizeof (struct occr);
1816 /* First occurrence of this expression in any block? */
1817 if (cur_expr->avail_occr == NULL)
1818 cur_expr->avail_occr = avail_occr;
1819 else
1820 last_occr->next = avail_occr;
1822 avail_occr->insn = insn;
1823 avail_occr->next = NULL;
1828 /* Insert pattern X in INSN in the hash table.
1829 X is a SET of a reg to either another reg or a constant.
1830 If it is already present, record it as the last occurrence in INSN's
1831 basic block. */
1833 static void
1834 insert_set_in_table (x, insn)
1835 rtx x;
1836 rtx insn;
1838 int found;
1839 unsigned int hash;
1840 struct expr *cur_expr, *last_expr = NULL;
1841 struct occr *cur_occr, *last_occr = NULL;
1843 if (GET_CODE (x) != SET
1844 || GET_CODE (SET_DEST (x)) != REG)
1845 abort ();
1847 hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size);
1849 cur_expr = set_hash_table[hash];
1850 found = 0;
1852 while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1854 /* If the expression isn't found, save a pointer to the end of
1855 the list. */
1856 last_expr = cur_expr;
1857 cur_expr = cur_expr->next_same_hash;
1860 if (! found)
1862 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1863 bytes_used += sizeof (struct expr);
1864 if (set_hash_table[hash] == NULL)
1865 /* This is the first pattern that hashed to this index. */
1866 set_hash_table[hash] = cur_expr;
1867 else
1868 /* Add EXPR to end of this hash chain. */
1869 last_expr->next_same_hash = cur_expr;
1871 /* Set the fields of the expr element.
1872 We must copy X because it can be modified when copy propagation is
1873 performed on its operands. */
1874 /* ??? Should this go in a different obstack? */
1875 cur_expr->expr = copy_rtx (x);
1876 cur_expr->bitmap_index = n_sets++;
1877 cur_expr->next_same_hash = NULL;
1878 cur_expr->antic_occr = NULL;
1879 cur_expr->avail_occr = NULL;
1882 /* Now record the occurrence. */
1883 cur_occr = cur_expr->avail_occr;
1885 /* Search for another occurrence in the same basic block. */
1886 while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn))
1888 /* If an occurrence isn't found, save a pointer to the end of
1889 the list. */
1890 last_occr = cur_occr;
1891 cur_occr = cur_occr->next;
1894 if (cur_occr)
1895 /* Found another instance of the expression in the same basic block.
1896 Prefer this occurrence to the currently recorded one. We want the
1897 last one in the block and the block is scanned from start to end. */
1898 cur_occr->insn = insn;
1899 else
1901 /* First occurrence of this expression in this basic block. */
1902 cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1903 bytes_used += sizeof (struct occr);
1905 /* First occurrence of this expression in any block? */
1906 if (cur_expr->avail_occr == NULL)
1907 cur_expr->avail_occr = cur_occr;
1908 else
1909 last_occr->next = cur_occr;
1911 cur_occr->insn = insn;
1912 cur_occr->next = NULL;
1916 /* Scan pattern PAT of INSN and add an entry to the hash table. If SET_P is
1917 non-zero, this is for the assignment hash table, otherwise it is for the
1918 expression hash table. */
1920 static void
1921 hash_scan_set (pat, insn, set_p)
1922 rtx pat, insn;
1923 int set_p;
1925 rtx src = SET_SRC (pat);
1926 rtx dest = SET_DEST (pat);
1928 if (GET_CODE (src) == CALL)
1929 hash_scan_call (src, insn);
1931 if (GET_CODE (dest) == REG)
1933 int regno = REGNO (dest);
1934 rtx tmp, note;
1936 /* Only record sets of pseudo-regs in the hash table. */
1937 if (! set_p
1938 && regno >= FIRST_PSEUDO_REGISTER
1939 /* Don't GCSE something if we can't do a reg/reg copy. */
1940 && can_copy_p [GET_MODE (dest)]
1941 /* Is SET_SRC something we want to gcse? */
1942 && want_to_gcse_p (src)
1943 /* Don't GCSE if it has attached REG_EQUIV note.
1944 At this point this only function parameters should have
1945 REG_EQUIV notes and if the argument slot is used somewhere
1946 explicitely, it means address of parameter has been taken,
1947 so we should not extend the lifetime of the pseudo. */
1948 && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
1949 || GET_CODE (XEXP (note, 0)) != MEM))
1951 /* An expression is not anticipatable if its operands are
1952 modified before this insn. */
1953 int antic_p = oprs_anticipatable_p (src, insn);
1954 /* An expression is not available if its operands are
1955 subsequently modified, including this insn. */
1956 int avail_p = oprs_available_p (src, insn);
1958 insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p);
1961 /* Record sets for constant/copy propagation. */
1962 else if (set_p
1963 && regno >= FIRST_PSEUDO_REGISTER
1964 && ((GET_CODE (src) == REG
1965 && REGNO (src) >= FIRST_PSEUDO_REGISTER
1966 && can_copy_p [GET_MODE (dest)])
1967 || GET_CODE (src) == CONST_INT
1968 || GET_CODE (src) == SYMBOL_REF
1969 || GET_CODE (src) == CONST_DOUBLE)
1970 /* A copy is not available if its src or dest is subsequently
1971 modified. Here we want to search from INSN+1 on, but
1972 oprs_available_p searches from INSN on. */
1973 && (insn == BLOCK_END (BLOCK_NUM (insn))
1974 || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
1975 && oprs_available_p (pat, tmp))))
1976 insert_set_in_table (pat, insn);
1980 static void
1981 hash_scan_clobber (x, insn)
1982 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1984 /* Currently nothing to do. */
1987 static void
1988 hash_scan_call (x, insn)
1989 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1991 /* Currently nothing to do. */
1994 /* Process INSN and add hash table entries as appropriate.
1996 Only available expressions that set a single pseudo-reg are recorded.
1998 Single sets in a PARALLEL could be handled, but it's an extra complication
1999 that isn't dealt with right now. The trick is handling the CLOBBERs that
2000 are also in the PARALLEL. Later.
2002 If SET_P is non-zero, this is for the assignment hash table,
2003 otherwise it is for the expression hash table.
2004 If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
2005 not record any expressions. */
2007 static void
2008 hash_scan_insn (insn, set_p, in_libcall_block)
2009 rtx insn;
2010 int set_p;
2011 int in_libcall_block;
2013 rtx pat = PATTERN (insn);
2014 int i;
2016 /* Pick out the sets of INSN and for other forms of instructions record
2017 what's been modified. */
2019 if (GET_CODE (pat) == SET && ! in_libcall_block)
2021 /* Ignore obvious no-ops. */
2022 if (SET_SRC (pat) != SET_DEST (pat))
2023 hash_scan_set (pat, insn, set_p);
2025 else if (GET_CODE (pat) == PARALLEL)
2026 for (i = 0; i < XVECLEN (pat, 0); i++)
2028 rtx x = XVECEXP (pat, 0, i);
2030 if (GET_CODE (x) == SET)
2032 if (GET_CODE (SET_SRC (x)) == CALL)
2033 hash_scan_call (SET_SRC (x), insn);
2035 else if (GET_CODE (x) == CLOBBER)
2036 hash_scan_clobber (x, insn);
2037 else if (GET_CODE (x) == CALL)
2038 hash_scan_call (x, insn);
2041 else if (GET_CODE (pat) == CLOBBER)
2042 hash_scan_clobber (pat, insn);
2043 else if (GET_CODE (pat) == CALL)
2044 hash_scan_call (pat, insn);
2047 static void
2048 dump_hash_table (file, name, table, table_size, total_size)
2049 FILE *file;
2050 const char *name;
2051 struct expr **table;
2052 int table_size, total_size;
2054 int i;
2055 /* Flattened out table, so it's printed in proper order. */
2056 struct expr **flat_table;
2057 unsigned int *hash_val;
2058 struct expr *expr;
2060 flat_table
2061 = (struct expr **) xcalloc (total_size, sizeof (struct expr *));
2062 hash_val = (unsigned int *) xmalloc (total_size * sizeof (unsigned int));
2064 for (i = 0; i < table_size; i++)
2065 for (expr = table[i]; expr != NULL; expr = expr->next_same_hash)
2067 flat_table[expr->bitmap_index] = expr;
2068 hash_val[expr->bitmap_index] = i;
2071 fprintf (file, "%s hash table (%d buckets, %d entries)\n",
2072 name, table_size, total_size);
2074 for (i = 0; i < total_size; i++)
2075 if (flat_table[i] != 0)
2077 expr = flat_table[i];
2078 fprintf (file, "Index %d (hash value %d)\n ",
2079 expr->bitmap_index, hash_val[i]);
2080 print_rtl (file, expr->expr);
2081 fprintf (file, "\n");
2084 fprintf (file, "\n");
2086 free (flat_table);
2087 free (hash_val);
2090 /* Record register first/last/block set information for REGNO in INSN.
2092 reg_first_set records the first place in the block where the register
2093 is set and is used to compute "anticipatability".
2095 reg_last_set records the last place in the block where the register
2096 is set and is used to compute "availability".
2098 reg_set_in_block records whether the register is set in the block
2099 and is used to compute "transparency". */
2101 static void
2102 record_last_reg_set_info (insn, regno)
2103 rtx insn;
2104 int regno;
2106 if (reg_first_set[regno] == NEVER_SET)
2107 reg_first_set[regno] = INSN_CUID (insn);
2109 reg_last_set[regno] = INSN_CUID (insn);
2110 SET_BIT (reg_set_in_block[BLOCK_NUM (insn)], regno);
2113 /* Record memory first/last/block set information for INSN. */
2115 static void
2116 record_last_mem_set_info (insn)
2117 rtx insn;
2119 if (mem_first_set == NEVER_SET)
2120 mem_first_set = INSN_CUID (insn);
2122 mem_last_set = INSN_CUID (insn);
2123 mem_set_in_block[BLOCK_NUM (insn)] = 1;
2126 /* Called from compute_hash_table via note_stores to handle one
2127 SET or CLOBBER in an insn. DATA is really the instruction in which
2128 the SET is taking place. */
2130 static void
2131 record_last_set_info (dest, setter, data)
2132 rtx dest, setter ATTRIBUTE_UNUSED;
2133 void *data;
2135 rtx last_set_insn = (rtx) data;
2137 if (GET_CODE (dest) == SUBREG)
2138 dest = SUBREG_REG (dest);
2140 if (GET_CODE (dest) == REG)
2141 record_last_reg_set_info (last_set_insn, REGNO (dest));
2142 else if (GET_CODE (dest) == MEM
2143 /* Ignore pushes, they clobber nothing. */
2144 && ! push_operand (dest, GET_MODE (dest)))
2145 record_last_mem_set_info (last_set_insn);
2148 /* Top level function to create an expression or assignment hash table.
2150 Expression entries are placed in the hash table if
2151 - they are of the form (set (pseudo-reg) src),
2152 - src is something we want to perform GCSE on,
2153 - none of the operands are subsequently modified in the block
2155 Assignment entries are placed in the hash table if
2156 - they are of the form (set (pseudo-reg) src),
2157 - src is something we want to perform const/copy propagation on,
2158 - none of the operands or target are subsequently modified in the block
2160 Currently src must be a pseudo-reg or a const_int.
2162 F is the first insn.
2163 SET_P is non-zero for computing the assignment hash table. */
2165 static void
2166 compute_hash_table (set_p)
2167 int set_p;
2169 int bb;
2171 /* While we compute the hash table we also compute a bit array of which
2172 registers are set in which blocks.
2173 We also compute which blocks set memory, in the absence of aliasing
2174 support [which is TODO].
2175 ??? This isn't needed during const/copy propagation, but it's cheap to
2176 compute. Later. */
2177 sbitmap_vector_zero (reg_set_in_block, n_basic_blocks);
2178 memset ((char *) mem_set_in_block, 0, n_basic_blocks);
2180 /* Some working arrays used to track first and last set in each block. */
2181 /* ??? One could use alloca here, but at some size a threshold is crossed
2182 beyond which one should use malloc. Are we at that threshold here? */
2183 reg_first_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2184 reg_last_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2186 for (bb = 0; bb < n_basic_blocks; bb++)
2188 rtx insn;
2189 unsigned int regno;
2190 int in_libcall_block;
2191 unsigned int i;
2193 /* First pass over the instructions records information used to
2194 determine when registers and memory are first and last set.
2195 ??? The mem_set_in_block and hard-reg reg_set_in_block computation
2196 could be moved to compute_sets since they currently don't change. */
2198 for (i = 0; i < max_gcse_regno; i++)
2199 reg_first_set[i] = reg_last_set[i] = NEVER_SET;
2201 mem_first_set = NEVER_SET;
2202 mem_last_set = NEVER_SET;
2204 for (insn = BLOCK_HEAD (bb);
2205 insn && insn != NEXT_INSN (BLOCK_END (bb));
2206 insn = NEXT_INSN (insn))
2208 #ifdef NON_SAVING_SETJMP
2209 if (NON_SAVING_SETJMP && GET_CODE (insn) == NOTE
2210 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)
2212 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2213 record_last_reg_set_info (insn, regno);
2214 continue;
2216 #endif
2218 if (! INSN_P (insn))
2219 continue;
2221 if (GET_CODE (insn) == CALL_INSN)
2223 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2224 if ((call_used_regs[regno]
2225 && regno != STACK_POINTER_REGNUM
2226 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2227 && regno != HARD_FRAME_POINTER_REGNUM
2228 #endif
2229 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2230 && ! (regno == ARG_POINTER_REGNUM && fixed_regs[regno])
2231 #endif
2232 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2233 && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2234 #endif
2236 && regno != FRAME_POINTER_REGNUM)
2237 || global_regs[regno])
2238 record_last_reg_set_info (insn, regno);
2240 if (! CONST_CALL_P (insn))
2241 record_last_mem_set_info (insn);
2244 note_stores (PATTERN (insn), record_last_set_info, insn);
2247 /* The next pass builds the hash table. */
2249 for (insn = BLOCK_HEAD (bb), in_libcall_block = 0;
2250 insn && insn != NEXT_INSN (BLOCK_END (bb));
2251 insn = NEXT_INSN (insn))
2252 if (INSN_P (insn))
2254 if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2255 in_libcall_block = 1;
2256 else if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
2257 in_libcall_block = 0;
2258 hash_scan_insn (insn, set_p, in_libcall_block);
2262 free (reg_first_set);
2263 free (reg_last_set);
2265 /* Catch bugs early. */
2266 reg_first_set = reg_last_set = 0;
2269 /* Allocate space for the set hash table.
2270 N_INSNS is the number of instructions in the function.
2271 It is used to determine the number of buckets to use. */
2273 static void
2274 alloc_set_hash_table (n_insns)
2275 int n_insns;
2277 int n;
2279 set_hash_table_size = n_insns / 4;
2280 if (set_hash_table_size < 11)
2281 set_hash_table_size = 11;
2283 /* Attempt to maintain efficient use of hash table.
2284 Making it an odd number is simplest for now.
2285 ??? Later take some measurements. */
2286 set_hash_table_size |= 1;
2287 n = set_hash_table_size * sizeof (struct expr *);
2288 set_hash_table = (struct expr **) gmalloc (n);
2291 /* Free things allocated by alloc_set_hash_table. */
2293 static void
2294 free_set_hash_table ()
2296 free (set_hash_table);
2299 /* Compute the hash table for doing copy/const propagation. */
2301 static void
2302 compute_set_hash_table ()
2304 /* Initialize count of number of entries in hash table. */
2305 n_sets = 0;
2306 memset ((char *) set_hash_table, 0,
2307 set_hash_table_size * sizeof (struct expr *));
2309 compute_hash_table (1);
2312 /* Allocate space for the expression hash table.
2313 N_INSNS is the number of instructions in the function.
2314 It is used to determine the number of buckets to use. */
2316 static void
2317 alloc_expr_hash_table (n_insns)
2318 unsigned int n_insns;
2320 int n;
2322 expr_hash_table_size = n_insns / 2;
2323 /* Make sure the amount is usable. */
2324 if (expr_hash_table_size < 11)
2325 expr_hash_table_size = 11;
2327 /* Attempt to maintain efficient use of hash table.
2328 Making it an odd number is simplest for now.
2329 ??? Later take some measurements. */
2330 expr_hash_table_size |= 1;
2331 n = expr_hash_table_size * sizeof (struct expr *);
2332 expr_hash_table = (struct expr **) gmalloc (n);
2335 /* Free things allocated by alloc_expr_hash_table. */
2337 static void
2338 free_expr_hash_table ()
2340 free (expr_hash_table);
2343 /* Compute the hash table for doing GCSE. */
2345 static void
2346 compute_expr_hash_table ()
2348 /* Initialize count of number of entries in hash table. */
2349 n_exprs = 0;
2350 memset ((char *) expr_hash_table, 0,
2351 expr_hash_table_size * sizeof (struct expr *));
2353 compute_hash_table (0);
2356 /* Expression tracking support. */
2358 /* Lookup pattern PAT in the expression table.
2359 The result is a pointer to the table entry, or NULL if not found. */
2361 static struct expr *
2362 lookup_expr (pat)
2363 rtx pat;
2365 int do_not_record_p;
2366 unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p,
2367 expr_hash_table_size);
2368 struct expr *expr;
2370 if (do_not_record_p)
2371 return NULL;
2373 expr = expr_hash_table[hash];
2375 while (expr && ! expr_equiv_p (expr->expr, pat))
2376 expr = expr->next_same_hash;
2378 return expr;
2381 /* Lookup REGNO in the set table. If PAT is non-NULL look for the entry that
2382 matches it, otherwise return the first entry for REGNO. The result is a
2383 pointer to the table entry, or NULL if not found. */
2385 static struct expr *
2386 lookup_set (regno, pat)
2387 unsigned int regno;
2388 rtx pat;
2390 unsigned int hash = hash_set (regno, set_hash_table_size);
2391 struct expr *expr;
2393 expr = set_hash_table[hash];
2395 if (pat)
2397 while (expr && ! expr_equiv_p (expr->expr, pat))
2398 expr = expr->next_same_hash;
2400 else
2402 while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2403 expr = expr->next_same_hash;
2406 return expr;
2409 /* Return the next entry for REGNO in list EXPR. */
2411 static struct expr *
2412 next_set (regno, expr)
2413 unsigned int regno;
2414 struct expr *expr;
2417 expr = expr->next_same_hash;
2418 while (expr && REGNO (SET_DEST (expr->expr)) != regno);
2420 return expr;
2423 /* Reset tables used to keep track of what's still available [since the
2424 start of the block]. */
2426 static void
2427 reset_opr_set_tables ()
2429 /* Maintain a bitmap of which regs have been set since beginning of
2430 the block. */
2431 sbitmap_zero (reg_set_bitmap);
2433 /* Also keep a record of the last instruction to modify memory.
2434 For now this is very trivial, we only record whether any memory
2435 location has been modified. */
2436 mem_last_set = 0;
2439 /* Return non-zero if the operands of X are not set before INSN in
2440 INSN's basic block. */
2442 static int
2443 oprs_not_set_p (x, insn)
2444 rtx x, insn;
2446 int i, j;
2447 enum rtx_code code;
2448 const char *fmt;
2450 if (x == 0)
2451 return 1;
2453 code = GET_CODE (x);
2454 switch (code)
2456 case PC:
2457 case CC0:
2458 case CONST:
2459 case CONST_INT:
2460 case CONST_DOUBLE:
2461 case SYMBOL_REF:
2462 case LABEL_REF:
2463 case ADDR_VEC:
2464 case ADDR_DIFF_VEC:
2465 return 1;
2467 case MEM:
2468 if (mem_last_set != 0)
2469 return 0;
2470 else
2471 return oprs_not_set_p (XEXP (x, 0), insn);
2473 case REG:
2474 return ! TEST_BIT (reg_set_bitmap, REGNO (x));
2476 default:
2477 break;
2480 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2482 if (fmt[i] == 'e')
2484 /* If we are about to do the last recursive call
2485 needed at this level, change it into iteration.
2486 This function is called enough to be worth it. */
2487 if (i == 0)
2488 return oprs_not_set_p (XEXP (x, i), insn);
2490 if (! oprs_not_set_p (XEXP (x, i), insn))
2491 return 0;
2493 else if (fmt[i] == 'E')
2494 for (j = 0; j < XVECLEN (x, i); j++)
2495 if (! oprs_not_set_p (XVECEXP (x, i, j), insn))
2496 return 0;
2499 return 1;
2502 /* Mark things set by a CALL. */
2504 static void
2505 mark_call (insn)
2506 rtx insn;
2508 mem_last_set = INSN_CUID (insn);
2511 /* Mark things set by a SET. */
2513 static void
2514 mark_set (pat, insn)
2515 rtx pat, insn;
2517 rtx dest = SET_DEST (pat);
2519 while (GET_CODE (dest) == SUBREG
2520 || GET_CODE (dest) == ZERO_EXTRACT
2521 || GET_CODE (dest) == SIGN_EXTRACT
2522 || GET_CODE (dest) == STRICT_LOW_PART)
2523 dest = XEXP (dest, 0);
2525 if (GET_CODE (dest) == REG)
2526 SET_BIT (reg_set_bitmap, REGNO (dest));
2527 else if (GET_CODE (dest) == MEM)
2528 mem_last_set = INSN_CUID (insn);
2530 if (GET_CODE (SET_SRC (pat)) == CALL)
2531 mark_call (insn);
2534 /* Record things set by a CLOBBER. */
2536 static void
2537 mark_clobber (pat, insn)
2538 rtx pat, insn;
2540 rtx clob = XEXP (pat, 0);
2542 while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2543 clob = XEXP (clob, 0);
2545 if (GET_CODE (clob) == REG)
2546 SET_BIT (reg_set_bitmap, REGNO (clob));
2547 else
2548 mem_last_set = INSN_CUID (insn);
2551 /* Record things set by INSN.
2552 This data is used by oprs_not_set_p. */
2554 static void
2555 mark_oprs_set (insn)
2556 rtx insn;
2558 rtx pat = PATTERN (insn);
2559 int i;
2561 if (GET_CODE (pat) == SET)
2562 mark_set (pat, insn);
2563 else if (GET_CODE (pat) == PARALLEL)
2564 for (i = 0; i < XVECLEN (pat, 0); i++)
2566 rtx x = XVECEXP (pat, 0, i);
2568 if (GET_CODE (x) == SET)
2569 mark_set (x, insn);
2570 else if (GET_CODE (x) == CLOBBER)
2571 mark_clobber (x, insn);
2572 else if (GET_CODE (x) == CALL)
2573 mark_call (insn);
2576 else if (GET_CODE (pat) == CLOBBER)
2577 mark_clobber (pat, insn);
2578 else if (GET_CODE (pat) == CALL)
2579 mark_call (insn);
2583 /* Classic GCSE reaching definition support. */
2585 /* Allocate reaching def variables. */
2587 static void
2588 alloc_rd_mem (n_blocks, n_insns)
2589 int n_blocks, n_insns;
2591 rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2592 sbitmap_vector_zero (rd_kill, n_basic_blocks);
2594 rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2595 sbitmap_vector_zero (rd_gen, n_basic_blocks);
2597 reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2598 sbitmap_vector_zero (reaching_defs, n_basic_blocks);
2600 rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2601 sbitmap_vector_zero (rd_out, n_basic_blocks);
2604 /* Free reaching def variables. */
2606 static void
2607 free_rd_mem ()
2609 free (rd_kill);
2610 free (rd_gen);
2611 free (reaching_defs);
2612 free (rd_out);
2615 /* Add INSN to the kills of BB. REGNO, set in BB, is killed by INSN. */
2617 static void
2618 handle_rd_kill_set (insn, regno, bb)
2619 rtx insn;
2620 int regno, bb;
2622 struct reg_set *this_reg;
2624 for (this_reg = reg_set_table[regno]; this_reg; this_reg = this_reg ->next)
2625 if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn))
2626 SET_BIT (rd_kill[bb], INSN_CUID (this_reg->insn));
2629 /* Compute the set of kill's for reaching definitions. */
2631 static void
2632 compute_kill_rd ()
2634 int bb, cuid;
2635 int regno, i;
2637 /* For each block
2638 For each set bit in `gen' of the block (i.e each insn which
2639 generates a definition in the block)
2640 Call the reg set by the insn corresponding to that bit regx
2641 Look at the linked list starting at reg_set_table[regx]
2642 For each setting of regx in the linked list, which is not in
2643 this block
2644 Set the bit in `kill' corresponding to that insn. */
2645 for (bb = 0; bb < n_basic_blocks; bb++)
2646 for (cuid = 0; cuid < max_cuid; cuid++)
2647 if (TEST_BIT (rd_gen[bb], cuid))
2649 rtx insn = CUID_INSN (cuid);
2650 rtx pat = PATTERN (insn);
2652 if (GET_CODE (insn) == CALL_INSN)
2654 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2656 if ((call_used_regs[regno]
2657 && regno != STACK_POINTER_REGNUM
2658 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2659 && regno != HARD_FRAME_POINTER_REGNUM
2660 #endif
2661 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2662 && ! (regno == ARG_POINTER_REGNUM
2663 && fixed_regs[regno])
2664 #endif
2665 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2666 && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2667 #endif
2668 && regno != FRAME_POINTER_REGNUM)
2669 || global_regs[regno])
2670 handle_rd_kill_set (insn, regno, bb);
2674 if (GET_CODE (pat) == PARALLEL)
2676 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
2678 enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i));
2680 if ((code == SET || code == CLOBBER)
2681 && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG)
2682 handle_rd_kill_set (insn,
2683 REGNO (XEXP (XVECEXP (pat, 0, i), 0)),
2684 bb);
2687 else if (GET_CODE (pat) == SET && GET_CODE (SET_DEST (pat)) == REG)
2688 /* Each setting of this register outside of this block
2689 must be marked in the set of kills in this block. */
2690 handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), bb);
2694 /* Compute the reaching definitions as in
2695 Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
2696 Chapter 10. It is the same algorithm as used for computing available
2697 expressions but applied to the gens and kills of reaching definitions. */
2699 static void
2700 compute_rd ()
2702 int bb, changed, passes;
2704 for (bb = 0; bb < n_basic_blocks; bb++)
2705 sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/);
2707 passes = 0;
2708 changed = 1;
2709 while (changed)
2711 changed = 0;
2712 for (bb = 0; bb < n_basic_blocks; bb++)
2714 sbitmap_union_of_preds (reaching_defs[bb], rd_out, bb);
2715 changed |= sbitmap_union_of_diff (rd_out[bb], rd_gen[bb],
2716 reaching_defs[bb], rd_kill[bb]);
2718 passes++;
2721 if (gcse_file)
2722 fprintf (gcse_file, "reaching def computation: %d passes\n", passes);
2725 /* Classic GCSE available expression support. */
2727 /* Allocate memory for available expression computation. */
2729 static void
2730 alloc_avail_expr_mem (n_blocks, n_exprs)
2731 int n_blocks, n_exprs;
2733 ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2734 sbitmap_vector_zero (ae_kill, n_basic_blocks);
2736 ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2737 sbitmap_vector_zero (ae_gen, n_basic_blocks);
2739 ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2740 sbitmap_vector_zero (ae_in, n_basic_blocks);
2742 ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2743 sbitmap_vector_zero (ae_out, n_basic_blocks);
2746 static void
2747 free_avail_expr_mem ()
2749 free (ae_kill);
2750 free (ae_gen);
2751 free (ae_in);
2752 free (ae_out);
2755 /* Compute the set of available expressions generated in each basic block. */
2757 static void
2758 compute_ae_gen ()
2760 unsigned int i;
2761 struct expr *expr;
2762 struct occr *occr;
2764 /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
2765 This is all we have to do because an expression is not recorded if it
2766 is not available, and the only expressions we want to work with are the
2767 ones that are recorded. */
2768 for (i = 0; i < expr_hash_table_size; i++)
2769 for (expr = expr_hash_table[i]; expr != 0; expr = expr->next_same_hash)
2770 for (occr = expr->avail_occr; occr != 0; occr = occr->next)
2771 SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index);
2774 /* Return non-zero if expression X is killed in BB. */
2776 static int
2777 expr_killed_p (x, bb)
2778 rtx x;
2779 int bb;
2781 int i, j;
2782 enum rtx_code code;
2783 const char *fmt;
2785 if (x == 0)
2786 return 1;
2788 code = GET_CODE (x);
2789 switch (code)
2791 case REG:
2792 return TEST_BIT (reg_set_in_block[bb], REGNO (x));
2794 case MEM:
2795 if (mem_set_in_block[bb])
2796 return 1;
2797 else
2798 return expr_killed_p (XEXP (x, 0), bb);
2800 case PC:
2801 case CC0: /*FIXME*/
2802 case CONST:
2803 case CONST_INT:
2804 case CONST_DOUBLE:
2805 case SYMBOL_REF:
2806 case LABEL_REF:
2807 case ADDR_VEC:
2808 case ADDR_DIFF_VEC:
2809 return 0;
2811 default:
2812 break;
2815 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2817 if (fmt[i] == 'e')
2819 /* If we are about to do the last recursive call
2820 needed at this level, change it into iteration.
2821 This function is called enough to be worth it. */
2822 if (i == 0)
2823 return expr_killed_p (XEXP (x, i), bb);
2824 else if (expr_killed_p (XEXP (x, i), bb))
2825 return 1;
2827 else if (fmt[i] == 'E')
2828 for (j = 0; j < XVECLEN (x, i); j++)
2829 if (expr_killed_p (XVECEXP (x, i, j), bb))
2830 return 1;
2833 return 0;
2836 /* Compute the set of available expressions killed in each basic block. */
2838 static void
2839 compute_ae_kill (ae_gen, ae_kill)
2840 sbitmap *ae_gen, *ae_kill;
2842 int bb;
2843 unsigned int i;
2844 struct expr *expr;
2846 for (bb = 0; bb < n_basic_blocks; bb++)
2847 for (i = 0; i < expr_hash_table_size; i++)
2848 for (expr = expr_hash_table[i]; expr; expr = expr->next_same_hash)
2850 /* Skip EXPR if generated in this block. */
2851 if (TEST_BIT (ae_gen[bb], expr->bitmap_index))
2852 continue;
2854 if (expr_killed_p (expr->expr, bb))
2855 SET_BIT (ae_kill[bb], expr->bitmap_index);
2859 /* Actually perform the Classic GCSE optimizations. */
2861 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
2863 CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
2864 as a positive reach. We want to do this when there are two computations
2865 of the expression in the block.
2867 VISITED is a pointer to a working buffer for tracking which BB's have
2868 been visited. It is NULL for the top-level call.
2870 We treat reaching expressions that go through blocks containing the same
2871 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
2872 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2873 2 as not reaching. The intent is to improve the probability of finding
2874 only one reaching expression and to reduce register lifetimes by picking
2875 the closest such expression. */
2877 static int
2878 expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited)
2879 struct occr *occr;
2880 struct expr *expr;
2881 int bb;
2882 int check_self_loop;
2883 char *visited;
2885 edge pred;
2887 for (pred = BASIC_BLOCK(bb)->pred; pred != NULL; pred = pred->pred_next)
2889 int pred_bb = pred->src->index;
2891 if (visited[pred_bb])
2892 /* This predecessor has already been visited. Nothing to do. */
2894 else if (pred_bb == bb)
2896 /* BB loops on itself. */
2897 if (check_self_loop
2898 && TEST_BIT (ae_gen[pred_bb], expr->bitmap_index)
2899 && BLOCK_NUM (occr->insn) == pred_bb)
2900 return 1;
2902 visited[pred_bb] = 1;
2905 /* Ignore this predecessor if it kills the expression. */
2906 else if (TEST_BIT (ae_kill[pred_bb], expr->bitmap_index))
2907 visited[pred_bb] = 1;
2909 /* Does this predecessor generate this expression? */
2910 else if (TEST_BIT (ae_gen[pred_bb], expr->bitmap_index))
2912 /* Is this the occurrence we're looking for?
2913 Note that there's only one generating occurrence per block
2914 so we just need to check the block number. */
2915 if (BLOCK_NUM (occr->insn) == pred_bb)
2916 return 1;
2918 visited[pred_bb] = 1;
2921 /* Neither gen nor kill. */
2922 else
2924 visited[pred_bb] = 1;
2925 if (expr_reaches_here_p_work (occr, expr, pred_bb, check_self_loop,
2926 visited))
2928 return 1;
2932 /* All paths have been checked. */
2933 return 0;
2936 /* This wrapper for expr_reaches_here_p_work() is to ensure that any
2937 memory allocated for that function is returned. */
2939 static int
2940 expr_reaches_here_p (occr, expr, bb, check_self_loop)
2941 struct occr *occr;
2942 struct expr *expr;
2943 int bb;
2944 int check_self_loop;
2946 int rval;
2947 char *visited = (char *) xcalloc (n_basic_blocks, 1);
2949 rval = expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited);
2951 free (visited);
2952 return rval;
2955 /* Return the instruction that computes EXPR that reaches INSN's basic block.
2956 If there is more than one such instruction, return NULL.
2958 Called only by handle_avail_expr. */
2960 static rtx
2961 computing_insn (expr, insn)
2962 struct expr *expr;
2963 rtx insn;
2965 int bb = BLOCK_NUM (insn);
2967 if (expr->avail_occr->next == NULL)
2969 if (BLOCK_NUM (expr->avail_occr->insn) == bb)
2970 /* The available expression is actually itself
2971 (i.e. a loop in the flow graph) so do nothing. */
2972 return NULL;
2974 /* (FIXME) Case that we found a pattern that was created by
2975 a substitution that took place. */
2976 return expr->avail_occr->insn;
2978 else
2980 /* Pattern is computed more than once.
2981 Search backwards from this insn to see how many of these
2982 computations actually reach this insn. */
2983 struct occr *occr;
2984 rtx insn_computes_expr = NULL;
2985 int can_reach = 0;
2987 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
2989 if (BLOCK_NUM (occr->insn) == bb)
2991 /* The expression is generated in this block.
2992 The only time we care about this is when the expression
2993 is generated later in the block [and thus there's a loop].
2994 We let the normal cse pass handle the other cases. */
2995 if (INSN_CUID (insn) < INSN_CUID (occr->insn)
2996 && expr_reaches_here_p (occr, expr, bb, 1))
2998 can_reach++;
2999 if (can_reach > 1)
3000 return NULL;
3002 insn_computes_expr = occr->insn;
3005 else if (expr_reaches_here_p (occr, expr, bb, 0))
3007 can_reach++;
3008 if (can_reach > 1)
3009 return NULL;
3011 insn_computes_expr = occr->insn;
3015 if (insn_computes_expr == NULL)
3016 abort ();
3018 return insn_computes_expr;
3022 /* Return non-zero if the definition in DEF_INSN can reach INSN.
3023 Only called by can_disregard_other_sets. */
3025 static int
3026 def_reaches_here_p (insn, def_insn)
3027 rtx insn, def_insn;
3029 rtx reg;
3031 if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn)))
3032 return 1;
3034 if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn))
3036 if (INSN_CUID (def_insn) < INSN_CUID (insn))
3038 if (GET_CODE (PATTERN (def_insn)) == PARALLEL)
3039 return 1;
3040 else if (GET_CODE (PATTERN (def_insn)) == CLOBBER)
3041 reg = XEXP (PATTERN (def_insn), 0);
3042 else if (GET_CODE (PATTERN (def_insn)) == SET)
3043 reg = SET_DEST (PATTERN (def_insn));
3044 else
3045 abort ();
3047 return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn);
3049 else
3050 return 0;
3053 return 0;
3056 /* Return non-zero if *ADDR_THIS_REG can only have one value at INSN. The
3057 value returned is the number of definitions that reach INSN. Returning a
3058 value of zero means that [maybe] more than one definition reaches INSN and
3059 the caller can't perform whatever optimization it is trying. i.e. it is
3060 always safe to return zero. */
3062 static int
3063 can_disregard_other_sets (addr_this_reg, insn, for_combine)
3064 struct reg_set **addr_this_reg;
3065 rtx insn;
3066 int for_combine;
3068 int number_of_reaching_defs = 0;
3069 struct reg_set *this_reg;
3071 for (this_reg = *addr_this_reg; this_reg != 0; this_reg = this_reg->next)
3072 if (def_reaches_here_p (insn, this_reg->insn))
3074 number_of_reaching_defs++;
3075 /* Ignore parallels for now. */
3076 if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL)
3077 return 0;
3079 if (!for_combine
3080 && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER
3081 || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3082 SET_SRC (PATTERN (insn)))))
3083 /* A setting of the reg to a different value reaches INSN. */
3084 return 0;
3086 if (number_of_reaching_defs > 1)
3088 /* If in this setting the value the register is being set to is
3089 equal to the previous value the register was set to and this
3090 setting reaches the insn we are trying to do the substitution
3091 on then we are ok. */
3092 if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER)
3093 return 0;
3094 else if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3095 SET_SRC (PATTERN (insn))))
3096 return 0;
3099 *addr_this_reg = this_reg;
3102 return number_of_reaching_defs;
3105 /* Expression computed by insn is available and the substitution is legal,
3106 so try to perform the substitution.
3108 The result is non-zero if any changes were made. */
3110 static int
3111 handle_avail_expr (insn, expr)
3112 rtx insn;
3113 struct expr *expr;
3115 rtx pat, insn_computes_expr;
3116 rtx to;
3117 struct reg_set *this_reg;
3118 int found_setting, use_src;
3119 int changed = 0;
3121 /* We only handle the case where one computation of the expression
3122 reaches this instruction. */
3123 insn_computes_expr = computing_insn (expr, insn);
3124 if (insn_computes_expr == NULL)
3125 return 0;
3127 found_setting = 0;
3128 use_src = 0;
3130 /* At this point we know only one computation of EXPR outside of this
3131 block reaches this insn. Now try to find a register that the
3132 expression is computed into. */
3133 if (GET_CODE (SET_SRC (PATTERN (insn_computes_expr))) == REG)
3135 /* This is the case when the available expression that reaches
3136 here has already been handled as an available expression. */
3137 unsigned int regnum_for_replacing
3138 = REGNO (SET_SRC (PATTERN (insn_computes_expr)));
3140 /* If the register was created by GCSE we can't use `reg_set_table',
3141 however we know it's set only once. */
3142 if (regnum_for_replacing >= max_gcse_regno
3143 /* If the register the expression is computed into is set only once,
3144 or only one set reaches this insn, we can use it. */
3145 || (((this_reg = reg_set_table[regnum_for_replacing]),
3146 this_reg->next == NULL)
3147 || can_disregard_other_sets (&this_reg, insn, 0)))
3149 use_src = 1;
3150 found_setting = 1;
3154 if (!found_setting)
3156 unsigned int regnum_for_replacing
3157 = REGNO (SET_DEST (PATTERN (insn_computes_expr)));
3159 /* This shouldn't happen. */
3160 if (regnum_for_replacing >= max_gcse_regno)
3161 abort ();
3163 this_reg = reg_set_table[regnum_for_replacing];
3165 /* If the register the expression is computed into is set only once,
3166 or only one set reaches this insn, use it. */
3167 if (this_reg->next == NULL
3168 || can_disregard_other_sets (&this_reg, insn, 0))
3169 found_setting = 1;
3172 if (found_setting)
3174 pat = PATTERN (insn);
3175 if (use_src)
3176 to = SET_SRC (PATTERN (insn_computes_expr));
3177 else
3178 to = SET_DEST (PATTERN (insn_computes_expr));
3179 changed = validate_change (insn, &SET_SRC (pat), to, 0);
3181 /* We should be able to ignore the return code from validate_change but
3182 to play it safe we check. */
3183 if (changed)
3185 gcse_subst_count++;
3186 if (gcse_file != NULL)
3188 fprintf (gcse_file, "GCSE: Replacing the source in insn %d with",
3189 INSN_UID (insn));
3190 fprintf (gcse_file, " reg %d %s insn %d\n",
3191 REGNO (to), use_src ? "from" : "set in",
3192 INSN_UID (insn_computes_expr));
3197 /* The register that the expr is computed into is set more than once. */
3198 else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3200 /* Insert an insn after insnx that copies the reg set in insnx
3201 into a new pseudo register call this new register REGN.
3202 From insnb until end of basic block or until REGB is set
3203 replace all uses of REGB with REGN. */
3204 rtx new_insn;
3206 to = gen_reg_rtx (GET_MODE (SET_DEST (PATTERN (insn_computes_expr))));
3208 /* Generate the new insn. */
3209 /* ??? If the change fails, we return 0, even though we created
3210 an insn. I think this is ok. */
3211 new_insn
3212 = emit_insn_after (gen_rtx_SET (VOIDmode, to,
3213 SET_DEST (PATTERN
3214 (insn_computes_expr))),
3215 insn_computes_expr);
3217 /* Keep block number table up to date. */
3218 set_block_num (new_insn, BLOCK_NUM (insn_computes_expr));
3220 /* Keep register set table up to date. */
3221 record_one_set (REGNO (to), new_insn);
3223 gcse_create_count++;
3224 if (gcse_file != NULL)
3226 fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d",
3227 INSN_UID (NEXT_INSN (insn_computes_expr)),
3228 REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))));
3229 fprintf (gcse_file, ", computed in insn %d,\n",
3230 INSN_UID (insn_computes_expr));
3231 fprintf (gcse_file, " into newly allocated reg %d\n",
3232 REGNO (to));
3235 pat = PATTERN (insn);
3237 /* Do register replacement for INSN. */
3238 changed = validate_change (insn, &SET_SRC (pat),
3239 SET_DEST (PATTERN
3240 (NEXT_INSN (insn_computes_expr))),
3243 /* We should be able to ignore the return code from validate_change but
3244 to play it safe we check. */
3245 if (changed)
3247 gcse_subst_count++;
3248 if (gcse_file != NULL)
3250 fprintf (gcse_file,
3251 "GCSE: Replacing the source in insn %d with reg %d ",
3252 INSN_UID (insn),
3253 REGNO (SET_DEST (PATTERN (NEXT_INSN
3254 (insn_computes_expr)))));
3255 fprintf (gcse_file, "set in insn %d\n",
3256 INSN_UID (insn_computes_expr));
3261 return changed;
3264 /* Perform classic GCSE. This is called by one_classic_gcse_pass after all
3265 the dataflow analysis has been done.
3267 The result is non-zero if a change was made. */
3269 static int
3270 classic_gcse ()
3272 int bb, changed;
3273 rtx insn;
3275 /* Note we start at block 1. */
3277 changed = 0;
3278 for (bb = 1; bb < n_basic_blocks; bb++)
3280 /* Reset tables used to keep track of what's still valid [since the
3281 start of the block]. */
3282 reset_opr_set_tables ();
3284 for (insn = BLOCK_HEAD (bb);
3285 insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3286 insn = NEXT_INSN (insn))
3288 /* Is insn of form (set (pseudo-reg) ...)? */
3289 if (GET_CODE (insn) == INSN
3290 && GET_CODE (PATTERN (insn)) == SET
3291 && GET_CODE (SET_DEST (PATTERN (insn))) == REG
3292 && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER)
3294 rtx pat = PATTERN (insn);
3295 rtx src = SET_SRC (pat);
3296 struct expr *expr;
3298 if (want_to_gcse_p (src)
3299 /* Is the expression recorded? */
3300 && ((expr = lookup_expr (src)) != NULL)
3301 /* Is the expression available [at the start of the
3302 block]? */
3303 && TEST_BIT (ae_in[bb], expr->bitmap_index)
3304 /* Are the operands unchanged since the start of the
3305 block? */
3306 && oprs_not_set_p (src, insn))
3307 changed |= handle_avail_expr (insn, expr);
3310 /* Keep track of everything modified by this insn. */
3311 /* ??? Need to be careful w.r.t. mods done to INSN. */
3312 if (INSN_P (insn))
3313 mark_oprs_set (insn);
3317 return changed;
3320 /* Top level routine to perform one classic GCSE pass.
3322 Return non-zero if a change was made. */
3324 static int
3325 one_classic_gcse_pass (pass)
3326 int pass;
3328 int changed = 0;
3330 gcse_subst_count = 0;
3331 gcse_create_count = 0;
3333 alloc_expr_hash_table (max_cuid);
3334 alloc_rd_mem (n_basic_blocks, max_cuid);
3335 compute_expr_hash_table ();
3336 if (gcse_file)
3337 dump_hash_table (gcse_file, "Expression", expr_hash_table,
3338 expr_hash_table_size, n_exprs);
3340 if (n_exprs > 0)
3342 compute_kill_rd ();
3343 compute_rd ();
3344 alloc_avail_expr_mem (n_basic_blocks, n_exprs);
3345 compute_ae_gen ();
3346 compute_ae_kill (ae_gen, ae_kill);
3347 compute_available (ae_gen, ae_kill, ae_out, ae_in);
3348 changed = classic_gcse ();
3349 free_avail_expr_mem ();
3352 free_rd_mem ();
3353 free_expr_hash_table ();
3355 if (gcse_file)
3357 fprintf (gcse_file, "\n");
3358 fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs,",
3359 current_function_name, pass, bytes_used, gcse_subst_count);
3360 fprintf (gcse_file, "%d insns created\n", gcse_create_count);
3363 return changed;
3366 /* Compute copy/constant propagation working variables. */
3368 /* Local properties of assignments. */
3369 static sbitmap *cprop_pavloc;
3370 static sbitmap *cprop_absaltered;
3372 /* Global properties of assignments (computed from the local properties). */
3373 static sbitmap *cprop_avin;
3374 static sbitmap *cprop_avout;
3376 /* Allocate vars used for copy/const propagation. N_BLOCKS is the number of
3377 basic blocks. N_SETS is the number of sets. */
3379 static void
3380 alloc_cprop_mem (n_blocks, n_sets)
3381 int n_blocks, n_sets;
3383 cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
3384 cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
3386 cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
3387 cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
3390 /* Free vars used by copy/const propagation. */
3392 static void
3393 free_cprop_mem ()
3395 free (cprop_pavloc);
3396 free (cprop_absaltered);
3397 free (cprop_avin);
3398 free (cprop_avout);
3401 /* For each block, compute whether X is transparent. X is either an
3402 expression or an assignment [though we don't care which, for this context
3403 an assignment is treated as an expression]. For each block where an
3404 element of X is modified, set (SET_P == 1) or reset (SET_P == 0) the INDX
3405 bit in BMAP. */
3407 static void
3408 compute_transp (x, indx, bmap, set_p)
3409 rtx x;
3410 int indx;
3411 sbitmap *bmap;
3412 int set_p;
3414 int bb, i, j;
3415 enum rtx_code code;
3416 reg_set *r;
3417 const char *fmt;
3419 /* repeat is used to turn tail-recursion into iteration since GCC
3420 can't do it when there's no return value. */
3421 repeat:
3423 if (x == 0)
3424 return;
3426 code = GET_CODE (x);
3427 switch (code)
3429 case REG:
3430 if (set_p)
3432 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3434 for (bb = 0; bb < n_basic_blocks; bb++)
3435 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3436 SET_BIT (bmap[bb], indx);
3438 else
3440 for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3441 SET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3444 else
3446 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3448 for (bb = 0; bb < n_basic_blocks; bb++)
3449 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3450 RESET_BIT (bmap[bb], indx);
3452 else
3454 for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3455 RESET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3459 return;
3461 case MEM:
3462 if (set_p)
3464 for (bb = 0; bb < n_basic_blocks; bb++)
3465 if (mem_set_in_block[bb])
3466 SET_BIT (bmap[bb], indx);
3468 else
3470 for (bb = 0; bb < n_basic_blocks; bb++)
3471 if (mem_set_in_block[bb])
3472 RESET_BIT (bmap[bb], indx);
3475 x = XEXP (x, 0);
3476 goto repeat;
3478 case PC:
3479 case CC0: /*FIXME*/
3480 case CONST:
3481 case CONST_INT:
3482 case CONST_DOUBLE:
3483 case SYMBOL_REF:
3484 case LABEL_REF:
3485 case ADDR_VEC:
3486 case ADDR_DIFF_VEC:
3487 return;
3489 default:
3490 break;
3493 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3495 if (fmt[i] == 'e')
3497 /* If we are about to do the last recursive call
3498 needed at this level, change it into iteration.
3499 This function is called enough to be worth it. */
3500 if (i == 0)
3502 x = XEXP (x, i);
3503 goto repeat;
3506 compute_transp (XEXP (x, i), indx, bmap, set_p);
3508 else if (fmt[i] == 'E')
3509 for (j = 0; j < XVECLEN (x, i); j++)
3510 compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
3514 /* Top level routine to do the dataflow analysis needed by copy/const
3515 propagation. */
3517 static void
3518 compute_cprop_data ()
3520 compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, 1);
3521 compute_available (cprop_pavloc, cprop_absaltered,
3522 cprop_avout, cprop_avin);
3525 /* Copy/constant propagation. */
3527 /* Maximum number of register uses in an insn that we handle. */
3528 #define MAX_USES 8
3530 /* Table of uses found in an insn.
3531 Allocated statically to avoid alloc/free complexity and overhead. */
3532 static struct reg_use reg_use_table[MAX_USES];
3534 /* Index into `reg_use_table' while building it. */
3535 static int reg_use_count;
3537 /* Set up a list of register numbers used in INSN. The found uses are stored
3538 in `reg_use_table'. `reg_use_count' is initialized to zero before entry,
3539 and contains the number of uses in the table upon exit.
3541 ??? If a register appears multiple times we will record it multiple times.
3542 This doesn't hurt anything but it will slow things down. */
3544 static void
3545 find_used_regs (x)
3546 rtx x;
3548 int i, j;
3549 enum rtx_code code;
3550 const char *fmt;
3552 /* repeat is used to turn tail-recursion into iteration since GCC
3553 can't do it when there's no return value. */
3554 repeat:
3556 if (x == 0)
3557 return;
3559 code = GET_CODE (x);
3560 switch (code)
3562 case REG:
3563 if (reg_use_count == MAX_USES)
3564 return;
3566 reg_use_table[reg_use_count].reg_rtx = x;
3567 reg_use_count++;
3568 return;
3570 case MEM:
3571 x = XEXP (x, 0);
3572 goto repeat;
3574 case PC:
3575 case CC0:
3576 case CONST:
3577 case CONST_INT:
3578 case CONST_DOUBLE:
3579 case SYMBOL_REF:
3580 case LABEL_REF:
3581 case CLOBBER:
3582 case ADDR_VEC:
3583 case ADDR_DIFF_VEC:
3584 case ASM_INPUT: /*FIXME*/
3585 return;
3587 case SET:
3588 if (GET_CODE (SET_DEST (x)) == MEM)
3589 find_used_regs (SET_DEST (x));
3590 x = SET_SRC (x);
3591 goto repeat;
3593 default:
3594 break;
3597 /* Recursively scan the operands of this expression. */
3599 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3601 if (fmt[i] == 'e')
3603 /* If we are about to do the last recursive call
3604 needed at this level, change it into iteration.
3605 This function is called enough to be worth it. */
3606 if (i == 0)
3608 x = XEXP (x, 0);
3609 goto repeat;
3612 find_used_regs (XEXP (x, i));
3614 else if (fmt[i] == 'E')
3615 for (j = 0; j < XVECLEN (x, i); j++)
3616 find_used_regs (XVECEXP (x, i, j));
3620 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3621 Returns non-zero is successful. */
3623 static int
3624 try_replace_reg (from, to, insn)
3625 rtx from, to, insn;
3627 rtx note;
3628 rtx src;
3629 int success;
3630 rtx set;
3632 note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3634 if (!note)
3635 note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3637 /* If this fails we could try to simplify the result of the
3638 replacement and attempt to recognize the simplified insn.
3640 But we need a general simplify_rtx that doesn't have pass
3641 specific state variables. I'm not aware of one at the moment. */
3643 success = validate_replace_src (from, to, insn);
3644 set = single_set (insn);
3646 /* We've failed to do replacement. Try to add REG_EQUAL note to not loose
3647 information. */
3648 if (!success && !note)
3650 if (!set)
3651 return 0;
3653 note = REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL,
3654 copy_rtx (SET_SRC (set)),
3655 REG_NOTES (insn));
3658 /* Always do the replacement in REQ_EQUAL and REG_EQUIV notes. Also
3659 try to simplify them. */
3660 if (note)
3662 rtx simplified;
3664 if (!validate_replace_rtx_subexp (from, to, insn, &XEXP (note, 0)))
3665 abort();
3667 src = XEXP (note, 0);
3669 /* Try to simplify resulting note. */
3670 simplified = simplify_rtx (src);
3671 if (simplified)
3673 src = simplified;
3674 XEXP (note, 0) = src;
3677 /* REG_EQUAL may get simplified into register.
3678 We don't allow that. Remove that note. This code ought
3679 not to hapen, because previous code ought to syntetize
3680 reg-reg move, but be on the safe side. */
3681 else if (REG_P (src))
3682 remove_note (insn, note);
3684 return success;
3687 /* Find a set of REGNOs that are available on entry to INSN's block. Returns
3688 NULL no such set is found. */
3690 static struct expr *
3691 find_avail_set (regno, insn)
3692 int regno;
3693 rtx insn;
3695 /* SET1 contains the last set found that can be returned to the caller for
3696 use in a substitution. */
3697 struct expr *set1 = 0;
3699 /* Loops are not possible here. To get a loop we would need two sets
3700 available at the start of the block containing INSN. ie we would
3701 need two sets like this available at the start of the block:
3703 (set (reg X) (reg Y))
3704 (set (reg Y) (reg X))
3706 This can not happen since the set of (reg Y) would have killed the
3707 set of (reg X) making it unavailable at the start of this block. */
3708 while (1)
3710 rtx src;
3711 struct expr *set = lookup_set (regno, NULL_RTX);
3713 /* Find a set that is available at the start of the block
3714 which contains INSN. */
3715 while (set)
3717 if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
3718 break;
3719 set = next_set (regno, set);
3722 /* If no available set was found we've reached the end of the
3723 (possibly empty) copy chain. */
3724 if (set == 0)
3725 break;
3727 if (GET_CODE (set->expr) != SET)
3728 abort ();
3730 src = SET_SRC (set->expr);
3732 /* We know the set is available.
3733 Now check that SRC is ANTLOC (i.e. none of the source operands
3734 have changed since the start of the block).
3736 If the source operand changed, we may still use it for the next
3737 iteration of this loop, but we may not use it for substitutions. */
3739 if (CONSTANT_P (src) || oprs_not_set_p (src, insn))
3740 set1 = set;
3742 /* If the source of the set is anything except a register, then
3743 we have reached the end of the copy chain. */
3744 if (GET_CODE (src) != REG)
3745 break;
3747 /* Follow the copy chain, ie start another iteration of the loop
3748 and see if we have an available copy into SRC. */
3749 regno = REGNO (src);
3752 /* SET1 holds the last set that was available and anticipatable at
3753 INSN. */
3754 return set1;
3757 /* Subroutine of cprop_insn that tries to propagate constants into
3758 JUMP_INSNS. INSN must be a conditional jump; COPY is a copy of it
3759 that we can use for substitutions.
3760 REG_USED is the use we will try to replace, SRC is the constant we
3761 will try to substitute for it.
3762 Returns nonzero if a change was made. */
3764 static int
3765 cprop_jump (insn, copy, reg_used, src)
3766 rtx insn, copy;
3767 struct reg_use *reg_used;
3768 rtx src;
3770 rtx set = PATTERN (copy);
3771 rtx temp;
3773 /* Replace the register with the appropriate constant. */
3774 replace_rtx (SET_SRC (set), reg_used->reg_rtx, src);
3776 temp = simplify_ternary_operation (GET_CODE (SET_SRC (set)),
3777 GET_MODE (SET_SRC (set)),
3778 GET_MODE (XEXP (SET_SRC (set), 0)),
3779 XEXP (SET_SRC (set), 0),
3780 XEXP (SET_SRC (set), 1),
3781 XEXP (SET_SRC (set), 2));
3783 /* If no simplification can be made, then try the next
3784 register. */
3785 if (temp == 0)
3786 return 0;
3788 SET_SRC (set) = temp;
3790 /* That may have changed the structure of TEMP, so
3791 force it to be rerecognized if it has not turned
3792 into a nop or unconditional jump. */
3794 INSN_CODE (copy) = -1;
3795 if ((SET_DEST (set) == pc_rtx
3796 && (SET_SRC (set) == pc_rtx
3797 || GET_CODE (SET_SRC (set)) == LABEL_REF))
3798 || recog (PATTERN (copy), copy, NULL) >= 0)
3800 /* This has either become an unconditional jump
3801 or a nop-jump. We'd like to delete nop jumps
3802 here, but doing so confuses gcse. So we just
3803 make the replacement and let later passes
3804 sort things out. */
3805 PATTERN (insn) = set;
3806 INSN_CODE (insn) = -1;
3808 /* One less use of the label this insn used to jump to
3809 if we turned this into a NOP jump. */
3810 if (SET_SRC (set) == pc_rtx && JUMP_LABEL (insn) != 0)
3811 --LABEL_NUSES (JUMP_LABEL (insn));
3813 /* If this has turned into an unconditional jump,
3814 then put a barrier after it so that the unreachable
3815 code will be deleted. */
3816 if (GET_CODE (SET_SRC (set)) == LABEL_REF)
3817 emit_barrier_after (insn);
3819 run_jump_opt_after_gcse = 1;
3821 const_prop_count++;
3822 if (gcse_file != NULL)
3824 fprintf (gcse_file,
3825 "CONST-PROP: Replacing reg %d in insn %d with constant ",
3826 REGNO (reg_used->reg_rtx), INSN_UID (insn));
3827 print_rtl (gcse_file, src);
3828 fprintf (gcse_file, "\n");
3831 return 1;
3833 return 0;
3836 #ifdef HAVE_cc0
3838 /* Subroutine of cprop_insn that tries to propagate constants into JUMP_INSNS
3839 for machines that have CC0. INSN is a single set that stores into CC0;
3840 the insn following it is a conditional jump. REG_USED is the use we will
3841 try to replace, SRC is the constant we will try to substitute for it.
3842 Returns nonzero if a change was made. */
3844 static int
3845 cprop_cc0_jump (insn, reg_used, src)
3846 rtx insn;
3847 struct reg_use *reg_used;
3848 rtx src;
3850 rtx jump = NEXT_INSN (insn);
3851 rtx copy = copy_rtx (jump);
3852 rtx set = PATTERN (copy);
3854 /* We need to copy the source of the cc0 setter, as cprop_jump is going to
3855 substitute into it. */
3856 replace_rtx (SET_SRC (set), cc0_rtx, copy_rtx (SET_SRC (PATTERN (insn))));
3857 if (! cprop_jump (jump, copy, reg_used, src))
3858 return 0;
3860 /* If we succeeded, delete the cc0 setter. */
3861 PUT_CODE (insn, NOTE);
3862 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3863 NOTE_SOURCE_FILE (insn) = 0;
3864 return 1;
3866 #endif
3868 /* Perform constant and copy propagation on INSN.
3869 The result is non-zero if a change was made. */
3871 static int
3872 cprop_insn (insn, alter_jumps)
3873 rtx insn;
3874 int alter_jumps;
3876 struct reg_use *reg_used;
3877 int changed = 0;
3878 rtx note;
3880 /* Only propagate into SETs. Note that a conditional jump is a
3881 SET with pc_rtx as the destination. */
3882 if ((GET_CODE (insn) != INSN
3883 && GET_CODE (insn) != JUMP_INSN)
3884 || GET_CODE (PATTERN (insn)) != SET)
3885 return 0;
3887 reg_use_count = 0;
3888 find_used_regs (PATTERN (insn));
3890 note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3891 if (!note)
3892 note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3894 /* We may win even when propagating constants into notes. */
3895 if (note)
3896 find_used_regs (XEXP (note, 0));
3898 for (reg_used = &reg_use_table[0]; reg_use_count > 0;
3899 reg_used++, reg_use_count--)
3901 unsigned int regno = REGNO (reg_used->reg_rtx);
3902 rtx pat, src;
3903 struct expr *set;
3905 /* Ignore registers created by GCSE.
3906 We do this because ... */
3907 if (regno >= max_gcse_regno)
3908 continue;
3910 /* If the register has already been set in this block, there's
3911 nothing we can do. */
3912 if (! oprs_not_set_p (reg_used->reg_rtx, insn))
3913 continue;
3915 /* Find an assignment that sets reg_used and is available
3916 at the start of the block. */
3917 set = find_avail_set (regno, insn);
3918 if (! set)
3919 continue;
3921 pat = set->expr;
3922 /* ??? We might be able to handle PARALLELs. Later. */
3923 if (GET_CODE (pat) != SET)
3924 abort ();
3926 src = SET_SRC (pat);
3928 /* Constant propagation. */
3929 if (GET_CODE (src) == CONST_INT || GET_CODE (src) == CONST_DOUBLE
3930 || GET_CODE (src) == SYMBOL_REF)
3932 /* Handle normal insns first. */
3933 if (GET_CODE (insn) == INSN
3934 && try_replace_reg (reg_used->reg_rtx, src, insn))
3936 changed = 1;
3937 const_prop_count++;
3938 if (gcse_file != NULL)
3940 fprintf (gcse_file, "CONST-PROP: Replacing reg %d in ",
3941 regno);
3942 fprintf (gcse_file, "insn %d with constant ",
3943 INSN_UID (insn));
3944 print_rtl (gcse_file, src);
3945 fprintf (gcse_file, "\n");
3948 /* The original insn setting reg_used may or may not now be
3949 deletable. We leave the deletion to flow. */
3952 /* Try to propagate a CONST_INT into a conditional jump.
3953 We're pretty specific about what we will handle in this
3954 code, we can extend this as necessary over time.
3956 Right now the insn in question must look like
3957 (set (pc) (if_then_else ...)) */
3958 else if (alter_jumps
3959 && GET_CODE (insn) == JUMP_INSN
3960 && condjump_p (insn)
3961 && ! simplejump_p (insn))
3962 changed |= cprop_jump (insn, copy_rtx (insn), reg_used, src);
3963 #ifdef HAVE_cc0
3964 /* Similar code for machines that use a pair of CC0 setter and
3965 conditional jump insn. */
3966 else if (alter_jumps
3967 && GET_CODE (PATTERN (insn)) == SET
3968 && SET_DEST (PATTERN (insn)) == cc0_rtx
3969 && GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
3970 && condjump_p (NEXT_INSN (insn))
3971 && ! simplejump_p (NEXT_INSN (insn)))
3973 if (cprop_cc0_jump (insn, reg_used, src))
3975 changed = 1;
3976 break;
3979 #endif
3981 else if (GET_CODE (src) == REG
3982 && REGNO (src) >= FIRST_PSEUDO_REGISTER
3983 && REGNO (src) != regno)
3985 if (try_replace_reg (reg_used->reg_rtx, src, insn))
3987 changed = 1;
3988 copy_prop_count++;
3989 if (gcse_file != NULL)
3991 fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d",
3992 regno, INSN_UID (insn));
3993 fprintf (gcse_file, " with reg %d\n", REGNO (src));
3996 /* The original insn setting reg_used may or may not now be
3997 deletable. We leave the deletion to flow. */
3998 /* FIXME: If it turns out that the insn isn't deletable,
3999 then we may have unnecessarily extended register lifetimes
4000 and made things worse. */
4005 return changed;
4008 /* Forward propagate copies. This includes copies and constants. Return
4009 non-zero if a change was made. */
4011 static int
4012 cprop (alter_jumps)
4013 int alter_jumps;
4015 int bb, changed;
4016 rtx insn;
4018 /* Note we start at block 1. */
4020 changed = 0;
4021 for (bb = 1; bb < n_basic_blocks; bb++)
4023 /* Reset tables used to keep track of what's still valid [since the
4024 start of the block]. */
4025 reset_opr_set_tables ();
4027 for (insn = BLOCK_HEAD (bb);
4028 insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
4029 insn = NEXT_INSN (insn))
4031 if (INSN_P (insn))
4033 changed |= cprop_insn (insn, alter_jumps);
4035 /* Keep track of everything modified by this insn. */
4036 /* ??? Need to be careful w.r.t. mods done to INSN. Don't
4037 call mark_oprs_set if we turned the insn into a NOTE. */
4038 if (GET_CODE (insn) != NOTE)
4039 mark_oprs_set (insn);
4044 if (gcse_file != NULL)
4045 fprintf (gcse_file, "\n");
4047 return changed;
4050 /* Perform one copy/constant propagation pass.
4051 F is the first insn in the function.
4052 PASS is the pass count. */
4054 static int
4055 one_cprop_pass (pass, alter_jumps)
4056 int pass;
4057 int alter_jumps;
4059 int changed = 0;
4061 const_prop_count = 0;
4062 copy_prop_count = 0;
4064 alloc_set_hash_table (max_cuid);
4065 compute_set_hash_table ();
4066 if (gcse_file)
4067 dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size,
4068 n_sets);
4069 if (n_sets > 0)
4071 alloc_cprop_mem (n_basic_blocks, n_sets);
4072 compute_cprop_data ();
4073 changed = cprop (alter_jumps);
4074 free_cprop_mem ();
4077 free_set_hash_table ();
4079 if (gcse_file)
4081 fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, ",
4082 current_function_name, pass, bytes_used);
4083 fprintf (gcse_file, "%d const props, %d copy props\n\n",
4084 const_prop_count, copy_prop_count);
4087 return changed;
4090 /* Compute PRE+LCM working variables. */
4092 /* Local properties of expressions. */
4093 /* Nonzero for expressions that are transparent in the block. */
4094 static sbitmap *transp;
4096 /* Nonzero for expressions that are transparent at the end of the block.
4097 This is only zero for expressions killed by abnormal critical edge
4098 created by a calls. */
4099 static sbitmap *transpout;
4101 /* Nonzero for expressions that are computed (available) in the block. */
4102 static sbitmap *comp;
4104 /* Nonzero for expressions that are locally anticipatable in the block. */
4105 static sbitmap *antloc;
4107 /* Nonzero for expressions where this block is an optimal computation
4108 point. */
4109 static sbitmap *pre_optimal;
4111 /* Nonzero for expressions which are redundant in a particular block. */
4112 static sbitmap *pre_redundant;
4114 /* Nonzero for expressions which should be inserted on a specific edge. */
4115 static sbitmap *pre_insert_map;
4117 /* Nonzero for expressions which should be deleted in a specific block. */
4118 static sbitmap *pre_delete_map;
4120 /* Contains the edge_list returned by pre_edge_lcm. */
4121 static struct edge_list *edge_list;
4123 /* Redundant insns. */
4124 static sbitmap pre_redundant_insns;
4126 /* Allocate vars used for PRE analysis. */
4128 static void
4129 alloc_pre_mem (n_blocks, n_exprs)
4130 int n_blocks, n_exprs;
4132 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4133 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4134 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
4136 pre_optimal = NULL;
4137 pre_redundant = NULL;
4138 pre_insert_map = NULL;
4139 pre_delete_map = NULL;
4140 ae_in = NULL;
4141 ae_out = NULL;
4142 ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
4144 /* pre_insert and pre_delete are allocated later. */
4147 /* Free vars used for PRE analysis. */
4149 static void
4150 free_pre_mem ()
4152 free (transp);
4153 free (comp);
4155 /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */
4157 if (pre_optimal)
4158 free (pre_optimal);
4159 if (pre_redundant)
4160 free (pre_redundant);
4161 if (pre_insert_map)
4162 free (pre_insert_map);
4163 if (pre_delete_map)
4164 free (pre_delete_map);
4166 if (ae_in)
4167 free (ae_in);
4168 if (ae_out)
4169 free (ae_out);
4171 transp = comp = NULL;
4172 pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
4173 ae_in = ae_out = NULL;
4176 /* Top level routine to do the dataflow analysis needed by PRE. */
4178 static void
4179 compute_pre_data ()
4181 sbitmap trapping_expr;
4182 int i;
4183 unsigned int ui;
4185 compute_local_properties (transp, comp, antloc, 0);
4186 sbitmap_vector_zero (ae_kill, n_basic_blocks);
4188 /* Collect expressions which might trap. */
4189 trapping_expr = sbitmap_alloc (n_exprs);
4190 sbitmap_zero (trapping_expr);
4191 for (ui = 0; ui < expr_hash_table_size; ui++)
4193 struct expr *e;
4194 for (e = expr_hash_table[ui]; e != NULL; e = e->next_same_hash)
4195 if (may_trap_p (e->expr))
4196 SET_BIT (trapping_expr, e->bitmap_index);
4199 /* Compute ae_kill for each basic block using:
4201 ~(TRANSP | COMP)
4203 This is significantly faster than compute_ae_kill. */
4205 for (i = 0; i < n_basic_blocks; i++)
4207 edge e;
4209 /* If the current block is the destination of an abnormal edge, we
4210 kill all trapping expressions because we won't be able to properly
4211 place the instruction on the edge. So make them neither
4212 anticipatable nor transparent. This is fairly conservative. */
4213 for (e = BASIC_BLOCK (i)->pred; e ; e = e->pred_next)
4214 if (e->flags & EDGE_ABNORMAL)
4216 sbitmap_difference (antloc[i], antloc[i], trapping_expr);
4217 sbitmap_difference (transp[i], transp[i], trapping_expr);
4218 break;
4221 sbitmap_a_or_b (ae_kill[i], transp[i], comp[i]);
4222 sbitmap_not (ae_kill[i], ae_kill[i]);
4225 edge_list = pre_edge_lcm (gcse_file, n_exprs, transp, comp, antloc,
4226 ae_kill, &pre_insert_map, &pre_delete_map);
4227 free (antloc);
4228 antloc = NULL;
4229 free (ae_kill);
4230 ae_kill = NULL;
4231 free (trapping_expr);
4234 /* PRE utilities */
4236 /* Return non-zero if an occurrence of expression EXPR in OCCR_BB would reach
4237 block BB.
4239 VISITED is a pointer to a working buffer for tracking which BB's have
4240 been visited. It is NULL for the top-level call.
4242 We treat reaching expressions that go through blocks containing the same
4243 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
4244 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4245 2 as not reaching. The intent is to improve the probability of finding
4246 only one reaching expression and to reduce register lifetimes by picking
4247 the closest such expression. */
4249 static int
4250 pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited)
4251 int occr_bb;
4252 struct expr *expr;
4253 int bb;
4254 char *visited;
4256 edge pred;
4258 for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
4260 int pred_bb = pred->src->index;
4262 if (pred->src == ENTRY_BLOCK_PTR
4263 /* Has predecessor has already been visited? */
4264 || visited[pred_bb])
4265 ;/* Nothing to do. */
4267 /* Does this predecessor generate this expression? */
4268 else if (TEST_BIT (comp[pred_bb], expr->bitmap_index))
4270 /* Is this the occurrence we're looking for?
4271 Note that there's only one generating occurrence per block
4272 so we just need to check the block number. */
4273 if (occr_bb == pred_bb)
4274 return 1;
4276 visited[pred_bb] = 1;
4278 /* Ignore this predecessor if it kills the expression. */
4279 else if (! TEST_BIT (transp[pred_bb], expr->bitmap_index))
4280 visited[pred_bb] = 1;
4282 /* Neither gen nor kill. */
4283 else
4285 visited[pred_bb] = 1;
4286 if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
4287 return 1;
4291 /* All paths have been checked. */
4292 return 0;
4295 /* The wrapper for pre_expr_reaches_here_work that ensures that any
4296 memory allocated for that function is returned. */
4298 static int
4299 pre_expr_reaches_here_p (occr_bb, expr, bb)
4300 int occr_bb;
4301 struct expr *expr;
4302 int bb;
4304 int rval;
4305 char *visited = (char *) xcalloc (n_basic_blocks, 1);
4307 rval = pre_expr_reaches_here_p_work(occr_bb, expr, bb, visited);
4309 free (visited);
4310 return rval;
4314 /* Given an expr, generate RTL which we can insert at the end of a BB,
4315 or on an edge. Set the block number of any insns generated to
4316 the value of BB. */
4318 static rtx
4319 process_insert_insn (expr)
4320 struct expr *expr;
4322 rtx reg = expr->reaching_reg;
4323 rtx pat, copied_expr;
4324 rtx first_new_insn;
4326 start_sequence ();
4327 copied_expr = copy_rtx (expr->expr);
4328 emit_move_insn (reg, copied_expr);
4329 first_new_insn = get_insns ();
4330 pat = gen_sequence ();
4331 end_sequence ();
4333 return pat;
4336 /* Add EXPR to the end of basic block BB.
4338 This is used by both the PRE and code hoisting.
4340 For PRE, we want to verify that the expr is either transparent
4341 or locally anticipatable in the target block. This check makes
4342 no sense for code hoisting. */
4344 static void
4345 insert_insn_end_bb (expr, bb, pre)
4346 struct expr *expr;
4347 int bb;
4348 int pre;
4350 rtx insn = BLOCK_END (bb);
4351 rtx new_insn;
4352 rtx reg = expr->reaching_reg;
4353 int regno = REGNO (reg);
4354 rtx pat;
4355 int i;
4357 pat = process_insert_insn (expr);
4359 /* If the last insn is a jump, insert EXPR in front [taking care to
4360 handle cc0, etc. properly]. */
4362 if (GET_CODE (insn) == JUMP_INSN)
4364 #ifdef HAVE_cc0
4365 rtx note;
4366 #endif
4368 /* If this is a jump table, then we can't insert stuff here. Since
4369 we know the previous real insn must be the tablejump, we insert
4370 the new instruction just before the tablejump. */
4371 if (GET_CODE (PATTERN (insn)) == ADDR_VEC
4372 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
4373 insn = prev_real_insn (insn);
4375 #ifdef HAVE_cc0
4376 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4377 if cc0 isn't set. */
4378 note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
4379 if (note)
4380 insn = XEXP (note, 0);
4381 else
4383 rtx maybe_cc0_setter = prev_nonnote_insn (insn);
4384 if (maybe_cc0_setter
4385 && INSN_P (maybe_cc0_setter)
4386 && sets_cc0_p (PATTERN (maybe_cc0_setter)))
4387 insn = maybe_cc0_setter;
4389 #endif
4390 /* FIXME: What if something in cc0/jump uses value set in new insn? */
4391 new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4394 /* Likewise if the last insn is a call, as will happen in the presence
4395 of exception handling. */
4396 else if (GET_CODE (insn) == CALL_INSN)
4398 HARD_REG_SET parm_regs;
4399 int nparm_regs;
4400 rtx p;
4402 /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4403 we search backward and place the instructions before the first
4404 parameter is loaded. Do this for everyone for consistency and a
4405 presumtion that we'll get better code elsewhere as well.
4407 It should always be the case that we can put these instructions
4408 anywhere in the basic block with performing PRE optimizations.
4409 Check this. */
4411 if (pre
4412 && !TEST_BIT (antloc[bb], expr->bitmap_index)
4413 && !TEST_BIT (transp[bb], expr->bitmap_index))
4414 abort ();
4416 /* Since different machines initialize their parameter registers
4417 in different orders, assume nothing. Collect the set of all
4418 parameter registers. */
4419 CLEAR_HARD_REG_SET (parm_regs);
4420 nparm_regs = 0;
4421 for (p = CALL_INSN_FUNCTION_USAGE (insn); p ; p = XEXP (p, 1))
4422 if (GET_CODE (XEXP (p, 0)) == USE
4423 && GET_CODE (XEXP (XEXP (p, 0), 0)) == REG)
4425 if (REGNO (XEXP (XEXP (p, 0), 0)) >= FIRST_PSEUDO_REGISTER)
4426 abort ();
4428 SET_HARD_REG_BIT (parm_regs, REGNO (XEXP (XEXP (p, 0), 0)));
4429 nparm_regs++;
4432 /* Search backward for the first set of a register in this set. */
4433 while (nparm_regs && BLOCK_HEAD (bb) != insn)
4435 insn = PREV_INSN (insn);
4436 p = single_set (insn);
4437 if (p && GET_CODE (SET_DEST (p)) == REG
4438 && REGNO (SET_DEST (p)) < FIRST_PSEUDO_REGISTER
4439 && TEST_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p))))
4441 CLEAR_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p)));
4442 nparm_regs--;
4446 /* If we found all the parameter loads, then we want to insert
4447 before the first parameter load.
4449 If we did not find all the parameter loads, then we might have
4450 stopped on the head of the block, which could be a CODE_LABEL.
4451 If we inserted before the CODE_LABEL, then we would be putting
4452 the insn in the wrong basic block. In that case, put the insn
4453 after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */
4454 while (GET_CODE (insn) == CODE_LABEL
4455 || NOTE_INSN_BASIC_BLOCK_P (insn))
4456 insn = NEXT_INSN (insn);
4458 new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4460 else
4462 new_insn = emit_insn_after (pat, insn);
4463 BLOCK_END (bb) = new_insn;
4466 /* Keep block number table up to date.
4467 Note, PAT could be a multiple insn sequence, we have to make
4468 sure that each insn in the sequence is handled. */
4469 if (GET_CODE (pat) == SEQUENCE)
4471 for (i = 0; i < XVECLEN (pat, 0); i++)
4473 rtx insn = XVECEXP (pat, 0, i);
4475 set_block_num (insn, bb);
4476 if (INSN_P (insn))
4477 add_label_notes (PATTERN (insn), new_insn);
4479 note_stores (PATTERN (insn), record_set_info, insn);
4482 else
4484 add_label_notes (SET_SRC (pat), new_insn);
4485 set_block_num (new_insn, bb);
4487 /* Keep register set table up to date. */
4488 record_one_set (regno, new_insn);
4491 gcse_create_count++;
4493 if (gcse_file)
4495 fprintf (gcse_file, "PRE/HOIST: end of bb %d, insn %d, ",
4496 bb, INSN_UID (new_insn));
4497 fprintf (gcse_file, "copying expression %d to reg %d\n",
4498 expr->bitmap_index, regno);
4502 /* Insert partially redundant expressions on edges in the CFG to make
4503 the expressions fully redundant. */
4505 static int
4506 pre_edge_insert (edge_list, index_map)
4507 struct edge_list *edge_list;
4508 struct expr **index_map;
4510 int e, i, j, num_edges, set_size, did_insert = 0;
4511 sbitmap *inserted;
4513 /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
4514 if it reaches any of the deleted expressions. */
4516 set_size = pre_insert_map[0]->size;
4517 num_edges = NUM_EDGES (edge_list);
4518 inserted = sbitmap_vector_alloc (num_edges, n_exprs);
4519 sbitmap_vector_zero (inserted, num_edges);
4521 for (e = 0; e < num_edges; e++)
4523 int indx;
4524 basic_block pred = INDEX_EDGE_PRED_BB (edge_list, e);
4525 int bb = pred->index;
4527 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
4529 SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
4531 for (j = indx; insert && j < n_exprs; j++, insert >>= 1)
4532 if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
4534 struct expr *expr = index_map[j];
4535 struct occr *occr;
4537 /* Now look at each deleted occurence of this expression. */
4538 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4540 if (! occr->deleted_p)
4541 continue;
4543 /* Insert this expression on this edge if if it would
4544 reach the deleted occurence in BB. */
4545 if (!TEST_BIT (inserted[e], j))
4547 rtx insn;
4548 edge eg = INDEX_EDGE (edge_list, e);
4550 /* We can't insert anything on an abnormal and
4551 critical edge, so we insert the insn at the end of
4552 the previous block. There are several alternatives
4553 detailed in Morgans book P277 (sec 10.5) for
4554 handling this situation. This one is easiest for
4555 now. */
4557 if ((eg->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL)
4558 insert_insn_end_bb (index_map[j], bb, 0);
4559 else
4561 insn = process_insert_insn (index_map[j]);
4562 insert_insn_on_edge (insn, eg);
4565 if (gcse_file)
4567 fprintf (gcse_file, "PRE/HOIST: edge (%d,%d), ",
4569 INDEX_EDGE_SUCC_BB (edge_list, e)->index);
4570 fprintf (gcse_file, "copy expression %d\n",
4571 expr->bitmap_index);
4574 SET_BIT (inserted[e], j);
4575 did_insert = 1;
4576 gcse_create_count++;
4583 free (inserted);
4584 return did_insert;
4587 /* Copy the result of INSN to REG. INDX is the expression number. */
4589 static void
4590 pre_insert_copy_insn (expr, insn)
4591 struct expr *expr;
4592 rtx insn;
4594 rtx reg = expr->reaching_reg;
4595 int regno = REGNO (reg);
4596 int indx = expr->bitmap_index;
4597 rtx set = single_set (insn);
4598 rtx new_insn;
4599 int bb = BLOCK_NUM (insn);
4601 if (!set)
4602 abort ();
4604 new_insn = emit_insn_after (gen_rtx_SET (VOIDmode, reg, SET_DEST (set)),
4605 insn);
4607 /* Keep block number table up to date. */
4608 set_block_num (new_insn, bb);
4610 /* Keep register set table up to date. */
4611 record_one_set (regno, new_insn);
4612 if (insn == BLOCK_END (bb))
4613 BLOCK_END (bb) = new_insn;
4615 gcse_create_count++;
4617 if (gcse_file)
4618 fprintf (gcse_file,
4619 "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
4620 BLOCK_NUM (insn), INSN_UID (new_insn), indx,
4621 INSN_UID (insn), regno);
4624 /* Copy available expressions that reach the redundant expression
4625 to `reaching_reg'. */
4627 static void
4628 pre_insert_copies ()
4630 unsigned int i;
4631 struct expr *expr;
4632 struct occr *occr;
4633 struct occr *avail;
4635 /* For each available expression in the table, copy the result to
4636 `reaching_reg' if the expression reaches a deleted one.
4638 ??? The current algorithm is rather brute force.
4639 Need to do some profiling. */
4641 for (i = 0; i < expr_hash_table_size; i++)
4642 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4644 /* If the basic block isn't reachable, PPOUT will be TRUE. However,
4645 we don't want to insert a copy here because the expression may not
4646 really be redundant. So only insert an insn if the expression was
4647 deleted. This test also avoids further processing if the
4648 expression wasn't deleted anywhere. */
4649 if (expr->reaching_reg == NULL)
4650 continue;
4652 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4654 if (! occr->deleted_p)
4655 continue;
4657 for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
4659 rtx insn = avail->insn;
4661 /* No need to handle this one if handled already. */
4662 if (avail->copied_p)
4663 continue;
4665 /* Don't handle this one if it's a redundant one. */
4666 if (TEST_BIT (pre_redundant_insns, INSN_CUID (insn)))
4667 continue;
4669 /* Or if the expression doesn't reach the deleted one. */
4670 if (! pre_expr_reaches_here_p (BLOCK_NUM (avail->insn), expr,
4671 BLOCK_NUM (occr->insn)))
4672 continue;
4674 /* Copy the result of avail to reaching_reg. */
4675 pre_insert_copy_insn (expr, insn);
4676 avail->copied_p = 1;
4682 /* Delete redundant computations.
4683 Deletion is done by changing the insn to copy the `reaching_reg' of
4684 the expression into the result of the SET. It is left to later passes
4685 (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4687 Returns non-zero if a change is made. */
4689 static int
4690 pre_delete ()
4692 unsigned int i;
4693 int changed;
4694 struct expr *expr;
4695 struct occr *occr;
4697 changed = 0;
4698 for (i = 0; i < expr_hash_table_size; i++)
4699 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4701 int indx = expr->bitmap_index;
4703 /* We only need to search antic_occr since we require
4704 ANTLOC != 0. */
4706 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4708 rtx insn = occr->insn;
4709 rtx set;
4710 int bb = BLOCK_NUM (insn);
4712 if (TEST_BIT (pre_delete_map[bb], indx))
4714 set = single_set (insn);
4715 if (! set)
4716 abort ();
4718 /* Create a pseudo-reg to store the result of reaching
4719 expressions into. Get the mode for the new pseudo from
4720 the mode of the original destination pseudo. */
4721 if (expr->reaching_reg == NULL)
4722 expr->reaching_reg
4723 = gen_reg_rtx (GET_MODE (SET_DEST (set)));
4725 /* In theory this should never fail since we're creating
4726 a reg->reg copy.
4728 However, on the x86 some of the movXX patterns actually
4729 contain clobbers of scratch regs. This may cause the
4730 insn created by validate_change to not match any pattern
4731 and thus cause validate_change to fail. */
4732 if (validate_change (insn, &SET_SRC (set),
4733 expr->reaching_reg, 0))
4735 occr->deleted_p = 1;
4736 SET_BIT (pre_redundant_insns, INSN_CUID (insn));
4737 changed = 1;
4738 gcse_subst_count++;
4741 if (gcse_file)
4743 fprintf (gcse_file,
4744 "PRE: redundant insn %d (expression %d) in ",
4745 INSN_UID (insn), indx);
4746 fprintf (gcse_file, "bb %d, reaching reg is %d\n",
4747 bb, REGNO (expr->reaching_reg));
4753 return changed;
4756 /* Perform GCSE optimizations using PRE.
4757 This is called by one_pre_gcse_pass after all the dataflow analysis
4758 has been done.
4760 This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
4761 lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
4762 Compiler Design and Implementation.
4764 ??? A new pseudo reg is created to hold the reaching expression. The nice
4765 thing about the classical approach is that it would try to use an existing
4766 reg. If the register can't be adequately optimized [i.e. we introduce
4767 reload problems], one could add a pass here to propagate the new register
4768 through the block.
4770 ??? We don't handle single sets in PARALLELs because we're [currently] not
4771 able to copy the rest of the parallel when we insert copies to create full
4772 redundancies from partial redundancies. However, there's no reason why we
4773 can't handle PARALLELs in the cases where there are no partial
4774 redundancies. */
4776 static int
4777 pre_gcse ()
4779 unsigned int i;
4780 int did_insert, changed;
4781 struct expr **index_map;
4782 struct expr *expr;
4784 /* Compute a mapping from expression number (`bitmap_index') to
4785 hash table entry. */
4787 index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
4788 for (i = 0; i < expr_hash_table_size; i++)
4789 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4790 index_map[expr->bitmap_index] = expr;
4792 /* Reset bitmap used to track which insns are redundant. */
4793 pre_redundant_insns = sbitmap_alloc (max_cuid);
4794 sbitmap_zero (pre_redundant_insns);
4796 /* Delete the redundant insns first so that
4797 - we know what register to use for the new insns and for the other
4798 ones with reaching expressions
4799 - we know which insns are redundant when we go to create copies */
4801 changed = pre_delete ();
4803 did_insert = pre_edge_insert (edge_list, index_map);
4805 /* In other places with reaching expressions, copy the expression to the
4806 specially allocated pseudo-reg that reaches the redundant expr. */
4807 pre_insert_copies ();
4808 if (did_insert)
4810 commit_edge_insertions ();
4811 changed = 1;
4814 free (index_map);
4815 free (pre_redundant_insns);
4816 return changed;
4819 /* Top level routine to perform one PRE GCSE pass.
4821 Return non-zero if a change was made. */
4823 static int
4824 one_pre_gcse_pass (pass)
4825 int pass;
4827 int changed = 0;
4829 gcse_subst_count = 0;
4830 gcse_create_count = 0;
4832 alloc_expr_hash_table (max_cuid);
4833 add_noreturn_fake_exit_edges ();
4834 compute_expr_hash_table ();
4835 if (gcse_file)
4836 dump_hash_table (gcse_file, "Expression", expr_hash_table,
4837 expr_hash_table_size, n_exprs);
4839 if (n_exprs > 0)
4841 alloc_pre_mem (n_basic_blocks, n_exprs);
4842 compute_pre_data ();
4843 changed |= pre_gcse ();
4844 free_edge_list (edge_list);
4845 free_pre_mem ();
4848 remove_fake_edges ();
4849 free_expr_hash_table ();
4851 if (gcse_file)
4853 fprintf (gcse_file, "\nPRE GCSE of %s, pass %d: %d bytes needed, ",
4854 current_function_name, pass, bytes_used);
4855 fprintf (gcse_file, "%d substs, %d insns created\n",
4856 gcse_subst_count, gcse_create_count);
4859 return changed;
4862 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
4863 If notes are added to an insn which references a CODE_LABEL, the
4864 LABEL_NUSES count is incremented. We have to add REG_LABEL notes,
4865 because the following loop optimization pass requires them. */
4867 /* ??? This is very similar to the loop.c add_label_notes function. We
4868 could probably share code here. */
4870 /* ??? If there was a jump optimization pass after gcse and before loop,
4871 then we would not need to do this here, because jump would add the
4872 necessary REG_LABEL notes. */
4874 static void
4875 add_label_notes (x, insn)
4876 rtx x;
4877 rtx insn;
4879 enum rtx_code code = GET_CODE (x);
4880 int i, j;
4881 const char *fmt;
4883 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4885 /* This code used to ignore labels that referred to dispatch tables to
4886 avoid flow generating (slighly) worse code.
4888 We no longer ignore such label references (see LABEL_REF handling in
4889 mark_jump_label for additional information). */
4891 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, XEXP (x, 0),
4892 REG_NOTES (insn));
4893 if (LABEL_P (XEXP (x, 0)))
4894 LABEL_NUSES (XEXP (x, 0))++;
4895 return;
4898 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
4900 if (fmt[i] == 'e')
4901 add_label_notes (XEXP (x, i), insn);
4902 else if (fmt[i] == 'E')
4903 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4904 add_label_notes (XVECEXP (x, i, j), insn);
4908 /* Compute transparent outgoing information for each block.
4910 An expression is transparent to an edge unless it is killed by
4911 the edge itself. This can only happen with abnormal control flow,
4912 when the edge is traversed through a call. This happens with
4913 non-local labels and exceptions.
4915 This would not be necessary if we split the edge. While this is
4916 normally impossible for abnormal critical edges, with some effort
4917 it should be possible with exception handling, since we still have
4918 control over which handler should be invoked. But due to increased
4919 EH table sizes, this may not be worthwhile. */
4921 static void
4922 compute_transpout ()
4924 int bb;
4925 unsigned int i;
4926 struct expr *expr;
4928 sbitmap_vector_ones (transpout, n_basic_blocks);
4930 for (bb = 0; bb < n_basic_blocks; ++bb)
4932 /* Note that flow inserted a nop a the end of basic blocks that
4933 end in call instructions for reasons other than abnormal
4934 control flow. */
4935 if (GET_CODE (BLOCK_END (bb)) != CALL_INSN)
4936 continue;
4938 for (i = 0; i < expr_hash_table_size; i++)
4939 for (expr = expr_hash_table[i]; expr ; expr = expr->next_same_hash)
4940 if (GET_CODE (expr->expr) == MEM)
4942 if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
4943 && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
4944 continue;
4946 /* ??? Optimally, we would use interprocedural alias
4947 analysis to determine if this mem is actually killed
4948 by this call. */
4949 RESET_BIT (transpout[bb], expr->bitmap_index);
4954 /* Removal of useless null pointer checks */
4956 /* Called via note_stores. X is set by SETTER. If X is a register we must
4957 invalidate nonnull_local and set nonnull_killed. DATA is really a
4958 `null_pointer_info *'.
4960 We ignore hard registers. */
4962 static void
4963 invalidate_nonnull_info (x, setter, data)
4964 rtx x;
4965 rtx setter ATTRIBUTE_UNUSED;
4966 void *data;
4968 unsigned int regno;
4969 struct null_pointer_info *npi = (struct null_pointer_info *) data;
4971 while (GET_CODE (x) == SUBREG)
4972 x = SUBREG_REG (x);
4974 /* Ignore anything that is not a register or is a hard register. */
4975 if (GET_CODE (x) != REG
4976 || REGNO (x) < npi->min_reg
4977 || REGNO (x) >= npi->max_reg)
4978 return;
4980 regno = REGNO (x) - npi->min_reg;
4982 RESET_BIT (npi->nonnull_local[npi->current_block], regno);
4983 SET_BIT (npi->nonnull_killed[npi->current_block], regno);
4986 /* Do null-pointer check elimination for the registers indicated in
4987 NPI. NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps;
4988 they are not our responsibility to free. */
4990 static void
4991 delete_null_pointer_checks_1 (delete_list, block_reg, nonnull_avin,
4992 nonnull_avout, npi)
4993 varray_type *delete_list;
4994 unsigned int *block_reg;
4995 sbitmap *nonnull_avin;
4996 sbitmap *nonnull_avout;
4997 struct null_pointer_info *npi;
4999 int bb;
5000 int current_block;
5001 sbitmap *nonnull_local = npi->nonnull_local;
5002 sbitmap *nonnull_killed = npi->nonnull_killed;
5004 /* Compute local properties, nonnull and killed. A register will have
5005 the nonnull property if at the end of the current block its value is
5006 known to be nonnull. The killed property indicates that somewhere in
5007 the block any information we had about the register is killed.
5009 Note that a register can have both properties in a single block. That
5010 indicates that it's killed, then later in the block a new value is
5011 computed. */
5012 sbitmap_vector_zero (nonnull_local, n_basic_blocks);
5013 sbitmap_vector_zero (nonnull_killed, n_basic_blocks);
5015 for (current_block = 0; current_block < n_basic_blocks; current_block++)
5017 rtx insn, stop_insn;
5019 /* Set the current block for invalidate_nonnull_info. */
5020 npi->current_block = current_block;
5022 /* Scan each insn in the basic block looking for memory references and
5023 register sets. */
5024 stop_insn = NEXT_INSN (BLOCK_END (current_block));
5025 for (insn = BLOCK_HEAD (current_block);
5026 insn != stop_insn;
5027 insn = NEXT_INSN (insn))
5029 rtx set;
5030 rtx reg;
5032 /* Ignore anything that is not a normal insn. */
5033 if (! INSN_P (insn))
5034 continue;
5036 /* Basically ignore anything that is not a simple SET. We do have
5037 to make sure to invalidate nonnull_local and set nonnull_killed
5038 for such insns though. */
5039 set = single_set (insn);
5040 if (!set)
5042 note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
5043 continue;
5046 /* See if we've got a useable memory load. We handle it first
5047 in case it uses its address register as a dest (which kills
5048 the nonnull property). */
5049 if (GET_CODE (SET_SRC (set)) == MEM
5050 && GET_CODE ((reg = XEXP (SET_SRC (set), 0))) == REG
5051 && REGNO (reg) >= npi->min_reg
5052 && REGNO (reg) < npi->max_reg)
5053 SET_BIT (nonnull_local[current_block],
5054 REGNO (reg) - npi->min_reg);
5056 /* Now invalidate stuff clobbered by this insn. */
5057 note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
5059 /* And handle stores, we do these last since any sets in INSN can
5060 not kill the nonnull property if it is derived from a MEM
5061 appearing in a SET_DEST. */
5062 if (GET_CODE (SET_DEST (set)) == MEM
5063 && GET_CODE ((reg = XEXP (SET_DEST (set), 0))) == REG
5064 && REGNO (reg) >= npi->min_reg
5065 && REGNO (reg) < npi->max_reg)
5066 SET_BIT (nonnull_local[current_block],
5067 REGNO (reg) - npi->min_reg);
5071 /* Now compute global properties based on the local properties. This
5072 is a classic global availablity algorithm. */
5073 compute_available (nonnull_local, nonnull_killed,
5074 nonnull_avout, nonnull_avin);
5076 /* Now look at each bb and see if it ends with a compare of a value
5077 against zero. */
5078 for (bb = 0; bb < n_basic_blocks; bb++)
5080 rtx last_insn = BLOCK_END (bb);
5081 rtx condition, earliest;
5082 int compare_and_branch;
5084 /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and
5085 since BLOCK_REG[BB] is zero if this block did not end with a
5086 comparison against zero, this condition works. */
5087 if (block_reg[bb] < npi->min_reg
5088 || block_reg[bb] >= npi->max_reg)
5089 continue;
5091 /* LAST_INSN is a conditional jump. Get its condition. */
5092 condition = get_condition (last_insn, &earliest);
5094 /* If we can't determine the condition then skip. */
5095 if (! condition)
5096 continue;
5098 /* Is the register known to have a nonzero value? */
5099 if (!TEST_BIT (nonnull_avout[bb], block_reg[bb] - npi->min_reg))
5100 continue;
5102 /* Try to compute whether the compare/branch at the loop end is one or
5103 two instructions. */
5104 if (earliest == last_insn)
5105 compare_and_branch = 1;
5106 else if (earliest == prev_nonnote_insn (last_insn))
5107 compare_and_branch = 2;
5108 else
5109 continue;
5111 /* We know the register in this comparison is nonnull at exit from
5112 this block. We can optimize this comparison. */
5113 if (GET_CODE (condition) == NE)
5115 rtx new_jump;
5117 new_jump = emit_jump_insn_before (gen_jump (JUMP_LABEL (last_insn)),
5118 last_insn);
5119 JUMP_LABEL (new_jump) = JUMP_LABEL (last_insn);
5120 LABEL_NUSES (JUMP_LABEL (new_jump))++;
5121 emit_barrier_after (new_jump);
5123 if (!*delete_list)
5124 VARRAY_RTX_INIT (*delete_list, 10, "delete_list");
5126 VARRAY_PUSH_RTX (*delete_list, last_insn);
5127 if (compare_and_branch == 2)
5128 VARRAY_PUSH_RTX (*delete_list, earliest);
5130 /* Don't check this block again. (Note that BLOCK_END is
5131 invalid here; we deleted the last instruction in the
5132 block.) */
5133 block_reg[bb] = 0;
5137 /* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
5138 at compile time.
5140 This is conceptually similar to global constant/copy propagation and
5141 classic global CSE (it even uses the same dataflow equations as cprop).
5143 If a register is used as memory address with the form (mem (reg)), then we
5144 know that REG can not be zero at that point in the program. Any instruction
5145 which sets REG "kills" this property.
5147 So, if every path leading to a conditional branch has an available memory
5148 reference of that form, then we know the register can not have the value
5149 zero at the conditional branch.
5151 So we merely need to compute the local properies and propagate that data
5152 around the cfg, then optimize where possible.
5154 We run this pass two times. Once before CSE, then again after CSE. This
5155 has proven to be the most profitable approach. It is rare for new
5156 optimization opportunities of this nature to appear after the first CSE
5157 pass.
5159 This could probably be integrated with global cprop with a little work. */
5161 void
5162 delete_null_pointer_checks (f)
5163 rtx f ATTRIBUTE_UNUSED;
5165 sbitmap *nonnull_avin, *nonnull_avout;
5166 unsigned int *block_reg;
5167 varray_type delete_list = NULL;
5168 int bb;
5169 int reg;
5170 int regs_per_pass;
5171 int max_reg;
5172 unsigned int i;
5173 struct null_pointer_info npi;
5175 /* If we have only a single block, then there's nothing to do. */
5176 if (n_basic_blocks <= 1)
5177 return;
5179 /* Trying to perform global optimizations on flow graphs which have
5180 a high connectivity will take a long time and is unlikely to be
5181 particularly useful.
5183 In normal circumstances a cfg should have about twice as many edges
5184 as blocks. But we do not want to punish small functions which have
5185 a couple switch statements. So we require a relatively large number
5186 of basic blocks and the ratio of edges to blocks to be high. */
5187 if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
5188 return;
5190 /* We need four bitmaps, each with a bit for each register in each
5191 basic block. */
5192 max_reg = max_reg_num ();
5193 regs_per_pass = get_bitmap_width (4, n_basic_blocks, max_reg);
5195 /* Allocate bitmaps to hold local and global properties. */
5196 npi.nonnull_local = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5197 npi.nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5198 nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5199 nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5201 /* Go through the basic blocks, seeing whether or not each block
5202 ends with a conditional branch whose condition is a comparison
5203 against zero. Record the register compared in BLOCK_REG. */
5204 block_reg = (unsigned int *) xcalloc (n_basic_blocks, sizeof (int));
5205 for (bb = 0; bb < n_basic_blocks; bb++)
5207 rtx last_insn = BLOCK_END (bb);
5208 rtx condition, earliest, reg;
5210 /* We only want conditional branches. */
5211 if (GET_CODE (last_insn) != JUMP_INSN
5212 || !any_condjump_p (last_insn)
5213 || !onlyjump_p (last_insn))
5214 continue;
5216 /* LAST_INSN is a conditional jump. Get its condition. */
5217 condition = get_condition (last_insn, &earliest);
5219 /* If we were unable to get the condition, or it is not a equality
5220 comparison against zero then there's nothing we can do. */
5221 if (!condition
5222 || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
5223 || GET_CODE (XEXP (condition, 1)) != CONST_INT
5224 || (XEXP (condition, 1)
5225 != CONST0_RTX (GET_MODE (XEXP (condition, 0)))))
5226 continue;
5228 /* We must be checking a register against zero. */
5229 reg = XEXP (condition, 0);
5230 if (GET_CODE (reg) != REG)
5231 continue;
5233 block_reg[bb] = REGNO (reg);
5236 /* Go through the algorithm for each block of registers. */
5237 for (reg = FIRST_PSEUDO_REGISTER; reg < max_reg; reg += regs_per_pass)
5239 npi.min_reg = reg;
5240 npi.max_reg = MIN (reg + regs_per_pass, max_reg);
5241 delete_null_pointer_checks_1 (&delete_list, block_reg, nonnull_avin,
5242 nonnull_avout, &npi);
5245 /* Now delete the instructions all at once. This breaks the CFG. */
5246 if (delete_list)
5248 for (i = 0; i < VARRAY_ACTIVE_SIZE (delete_list); i++)
5249 delete_insn (VARRAY_RTX (delete_list, i));
5250 VARRAY_FREE (delete_list);
5253 /* Free the table of registers compared at the end of every block. */
5254 free (block_reg);
5256 /* Free bitmaps. */
5257 free (npi.nonnull_local);
5258 free (npi.nonnull_killed);
5259 free (nonnull_avin);
5260 free (nonnull_avout);
5263 /* Code Hoisting variables and subroutines. */
5265 /* Very busy expressions. */
5266 static sbitmap *hoist_vbein;
5267 static sbitmap *hoist_vbeout;
5269 /* Hoistable expressions. */
5270 static sbitmap *hoist_exprs;
5272 /* Dominator bitmaps. */
5273 static sbitmap *dominators;
5275 /* ??? We could compute post dominators and run this algorithm in
5276 reverse to to perform tail merging, doing so would probably be
5277 more effective than the tail merging code in jump.c.
5279 It's unclear if tail merging could be run in parallel with
5280 code hoisting. It would be nice. */
5282 /* Allocate vars used for code hoisting analysis. */
5284 static void
5285 alloc_code_hoist_mem (n_blocks, n_exprs)
5286 int n_blocks, n_exprs;
5288 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
5289 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
5290 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
5292 hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
5293 hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
5294 hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
5295 transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
5297 dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
5300 /* Free vars used for code hoisting analysis. */
5302 static void
5303 free_code_hoist_mem ()
5305 free (antloc);
5306 free (transp);
5307 free (comp);
5309 free (hoist_vbein);
5310 free (hoist_vbeout);
5311 free (hoist_exprs);
5312 free (transpout);
5314 free (dominators);
5317 /* Compute the very busy expressions at entry/exit from each block.
5319 An expression is very busy if all paths from a given point
5320 compute the expression. */
5322 static void
5323 compute_code_hoist_vbeinout ()
5325 int bb, changed, passes;
5327 sbitmap_vector_zero (hoist_vbeout, n_basic_blocks);
5328 sbitmap_vector_zero (hoist_vbein, n_basic_blocks);
5330 passes = 0;
5331 changed = 1;
5333 while (changed)
5335 changed = 0;
5337 /* We scan the blocks in the reverse order to speed up
5338 the convergence. */
5339 for (bb = n_basic_blocks - 1; bb >= 0; bb--)
5341 changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb],
5342 hoist_vbeout[bb], transp[bb]);
5343 if (bb != n_basic_blocks - 1)
5344 sbitmap_intersection_of_succs (hoist_vbeout[bb], hoist_vbein, bb);
5347 passes++;
5350 if (gcse_file)
5351 fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes);
5354 /* Top level routine to do the dataflow analysis needed by code hoisting. */
5356 static void
5357 compute_code_hoist_data ()
5359 compute_local_properties (transp, comp, antloc, 0);
5360 compute_transpout ();
5361 compute_code_hoist_vbeinout ();
5362 calculate_dominance_info (NULL, dominators, CDI_DOMINATORS);
5363 if (gcse_file)
5364 fprintf (gcse_file, "\n");
5367 /* Determine if the expression identified by EXPR_INDEX would
5368 reach BB unimpared if it was placed at the end of EXPR_BB.
5370 It's unclear exactly what Muchnick meant by "unimpared". It seems
5371 to me that the expression must either be computed or transparent in
5372 *every* block in the path(s) from EXPR_BB to BB. Any other definition
5373 would allow the expression to be hoisted out of loops, even if
5374 the expression wasn't a loop invariant.
5376 Contrast this to reachability for PRE where an expression is
5377 considered reachable if *any* path reaches instead of *all*
5378 paths. */
5380 static int
5381 hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited)
5382 int expr_bb;
5383 int expr_index;
5384 int bb;
5385 char *visited;
5387 edge pred;
5388 int visited_allocated_locally = 0;
5391 if (visited == NULL)
5393 visited_allocated_locally = 1;
5394 visited = xcalloc (n_basic_blocks, 1);
5397 for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
5399 int pred_bb = pred->src->index;
5401 if (pred->src == ENTRY_BLOCK_PTR)
5402 break;
5403 else if (visited[pred_bb])
5404 continue;
5406 /* Does this predecessor generate this expression? */
5407 else if (TEST_BIT (comp[pred_bb], expr_index))
5408 break;
5409 else if (! TEST_BIT (transp[pred_bb], expr_index))
5410 break;
5412 /* Not killed. */
5413 else
5415 visited[pred_bb] = 1;
5416 if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
5417 pred_bb, visited))
5418 break;
5421 if (visited_allocated_locally)
5422 free (visited);
5424 return (pred == NULL);
5427 /* Actually perform code hoisting. */
5429 static void
5430 hoist_code ()
5432 int bb, dominated;
5433 unsigned int i;
5434 struct expr **index_map;
5435 struct expr *expr;
5437 sbitmap_vector_zero (hoist_exprs, n_basic_blocks);
5439 /* Compute a mapping from expression number (`bitmap_index') to
5440 hash table entry. */
5442 index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
5443 for (i = 0; i < expr_hash_table_size; i++)
5444 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
5445 index_map[expr->bitmap_index] = expr;
5447 /* Walk over each basic block looking for potentially hoistable
5448 expressions, nothing gets hoisted from the entry block. */
5449 for (bb = 0; bb < n_basic_blocks; bb++)
5451 int found = 0;
5452 int insn_inserted_p;
5454 /* Examine each expression that is very busy at the exit of this
5455 block. These are the potentially hoistable expressions. */
5456 for (i = 0; i < hoist_vbeout[bb]->n_bits; i++)
5458 int hoistable = 0;
5460 if (TEST_BIT (hoist_vbeout[bb], i) && TEST_BIT (transpout[bb], i))
5462 /* We've found a potentially hoistable expression, now
5463 we look at every block BB dominates to see if it
5464 computes the expression. */
5465 for (dominated = 0; dominated < n_basic_blocks; dominated++)
5467 /* Ignore self dominance. */
5468 if (bb == dominated
5469 || ! TEST_BIT (dominators[dominated], bb))
5470 continue;
5472 /* We've found a dominated block, now see if it computes
5473 the busy expression and whether or not moving that
5474 expression to the "beginning" of that block is safe. */
5475 if (!TEST_BIT (antloc[dominated], i))
5476 continue;
5478 /* Note if the expression would reach the dominated block
5479 unimpared if it was placed at the end of BB.
5481 Keep track of how many times this expression is hoistable
5482 from a dominated block into BB. */
5483 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5484 hoistable++;
5487 /* If we found more than one hoistable occurence of this
5488 expression, then note it in the bitmap of expressions to
5489 hoist. It makes no sense to hoist things which are computed
5490 in only one BB, and doing so tends to pessimize register
5491 allocation. One could increase this value to try harder
5492 to avoid any possible code expansion due to register
5493 allocation issues; however experiments have shown that
5494 the vast majority of hoistable expressions are only movable
5495 from two successors, so raising this threshhold is likely
5496 to nullify any benefit we get from code hoisting. */
5497 if (hoistable > 1)
5499 SET_BIT (hoist_exprs[bb], i);
5500 found = 1;
5505 /* If we found nothing to hoist, then quit now. */
5506 if (! found)
5507 continue;
5509 /* Loop over all the hoistable expressions. */
5510 for (i = 0; i < hoist_exprs[bb]->n_bits; i++)
5512 /* We want to insert the expression into BB only once, so
5513 note when we've inserted it. */
5514 insn_inserted_p = 0;
5516 /* These tests should be the same as the tests above. */
5517 if (TEST_BIT (hoist_vbeout[bb], i))
5519 /* We've found a potentially hoistable expression, now
5520 we look at every block BB dominates to see if it
5521 computes the expression. */
5522 for (dominated = 0; dominated < n_basic_blocks; dominated++)
5524 /* Ignore self dominance. */
5525 if (bb == dominated
5526 || ! TEST_BIT (dominators[dominated], bb))
5527 continue;
5529 /* We've found a dominated block, now see if it computes
5530 the busy expression and whether or not moving that
5531 expression to the "beginning" of that block is safe. */
5532 if (!TEST_BIT (antloc[dominated], i))
5533 continue;
5535 /* The expression is computed in the dominated block and
5536 it would be safe to compute it at the start of the
5537 dominated block. Now we have to determine if the
5538 expresion would reach the dominated block if it was
5539 placed at the end of BB. */
5540 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5542 struct expr *expr = index_map[i];
5543 struct occr *occr = expr->antic_occr;
5544 rtx insn;
5545 rtx set;
5547 /* Find the right occurence of this expression. */
5548 while (BLOCK_NUM (occr->insn) != dominated && occr)
5549 occr = occr->next;
5551 /* Should never happen. */
5552 if (!occr)
5553 abort ();
5555 insn = occr->insn;
5557 set = single_set (insn);
5558 if (! set)
5559 abort ();
5561 /* Create a pseudo-reg to store the result of reaching
5562 expressions into. Get the mode for the new pseudo
5563 from the mode of the original destination pseudo. */
5564 if (expr->reaching_reg == NULL)
5565 expr->reaching_reg
5566 = gen_reg_rtx (GET_MODE (SET_DEST (set)));
5568 /* In theory this should never fail since we're creating
5569 a reg->reg copy.
5571 However, on the x86 some of the movXX patterns
5572 actually contain clobbers of scratch regs. This may
5573 cause the insn created by validate_change to not
5574 match any pattern and thus cause validate_change to
5575 fail. */
5576 if (validate_change (insn, &SET_SRC (set),
5577 expr->reaching_reg, 0))
5579 occr->deleted_p = 1;
5580 if (!insn_inserted_p)
5582 insert_insn_end_bb (index_map[i], bb, 0);
5583 insn_inserted_p = 1;
5592 free (index_map);
5595 /* Top level routine to perform one code hoisting (aka unification) pass
5597 Return non-zero if a change was made. */
5599 static int
5600 one_code_hoisting_pass ()
5602 int changed = 0;
5604 alloc_expr_hash_table (max_cuid);
5605 compute_expr_hash_table ();
5606 if (gcse_file)
5607 dump_hash_table (gcse_file, "Code Hosting Expressions", expr_hash_table,
5608 expr_hash_table_size, n_exprs);
5610 if (n_exprs > 0)
5612 alloc_code_hoist_mem (n_basic_blocks, n_exprs);
5613 compute_code_hoist_data ();
5614 hoist_code ();
5615 free_code_hoist_mem ();
5618 free_expr_hash_table ();
5620 return changed;