Remove compile time warnings when building arm.o
[official-gcc.git] / gcc / gcse.c
blob78b9d5910aefaf5cce2ed2455a691d5d8fe34d49
1 /* Global common subexpression elimination/Partial redundancy elimination
2 and global constant/copy propagation for GNU compiler.
3 Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* TODO
23 - reordering of memory allocation and freeing to be more space efficient
24 - do rough calc of how many regs are needed in each block, and a rough
25 calc of how many regs are available in each class and use that to
26 throttle back the code in cases where RTX_COST is minimal.
27 - dead store elimination
28 - a store to the same address as a load does not kill the load if the
29 source of the store is also the destination of the load. Handling this
30 allows more load motion, particularly out of loops.
31 - ability to realloc sbitmap vectors would allow one initial computation
32 of reg_set_in_block with only subsequent additions, rather than
33 recomputing it for each pass
37 /* References searched while implementing this.
39 Compilers Principles, Techniques and Tools
40 Aho, Sethi, Ullman
41 Addison-Wesley, 1988
43 Global Optimization by Suppression of Partial Redundancies
44 E. Morel, C. Renvoise
45 communications of the acm, Vol. 22, Num. 2, Feb. 1979
47 A Portable Machine-Independent Global Optimizer - Design and Measurements
48 Frederick Chow
49 Stanford Ph.D. thesis, Dec. 1983
51 A Fast Algorithm for Code Movement Optimization
52 D.M. Dhamdhere
53 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
55 A Solution to a Problem with Morel and Renvoise's
56 Global Optimization by Suppression of Partial Redundancies
57 K-H Drechsler, M.P. Stadel
58 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
60 Practical Adaptation of the Global Optimization
61 Algorithm of Morel and Renvoise
62 D.M. Dhamdhere
63 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
65 Efficiently Computing Static Single Assignment Form and the Control
66 Dependence Graph
67 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
68 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
70 Lazy Code Motion
71 J. Knoop, O. Ruthing, B. Steffen
72 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
74 What's In a Region? Or Computing Control Dependence Regions in Near-Linear
75 Time for Reducible Flow Control
76 Thomas Ball
77 ACM Letters on Programming Languages and Systems,
78 Vol. 2, Num. 1-4, Mar-Dec 1993
80 An Efficient Representation for Sparse Sets
81 Preston Briggs, Linda Torczon
82 ACM Letters on Programming Languages and Systems,
83 Vol. 2, Num. 1-4, Mar-Dec 1993
85 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
86 K-H Drechsler, M.P. Stadel
87 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
89 Partial Dead Code Elimination
90 J. Knoop, O. Ruthing, B. Steffen
91 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
93 Effective Partial Redundancy Elimination
94 P. Briggs, K.D. Cooper
95 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
97 The Program Structure Tree: Computing Control Regions in Linear Time
98 R. Johnson, D. Pearson, K. Pingali
99 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
101 Optimal Code Motion: Theory and Practice
102 J. Knoop, O. Ruthing, B. Steffen
103 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
105 The power of assignment motion
106 J. Knoop, O. Ruthing, B. Steffen
107 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
109 Global code motion / global value numbering
110 C. Click
111 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
113 Value Driven Redundancy Elimination
114 L.T. Simpson
115 Rice University Ph.D. thesis, Apr. 1996
117 Value Numbering
118 L.T. Simpson
119 Massively Scalar Compiler Project, Rice University, Sep. 1996
121 High Performance Compilers for Parallel Computing
122 Michael Wolfe
123 Addison-Wesley, 1996
125 Advanced Compiler Design and Implementation
126 Steven Muchnick
127 Morgan Kaufmann, 1997
129 Building an Optimizing Compiler
130 Robert Morgan
131 Digital Press, 1998
133 People wishing to speed up the code here should read:
134 Elimination Algorithms for Data Flow Analysis
135 B.G. Ryder, M.C. Paull
136 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
138 How to Analyze Large Programs Efficiently and Informatively
139 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
140 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
142 People wishing to do something different can find various possibilities
143 in the above papers and elsewhere.
146 #include "config.h"
147 #include "system.h"
148 #include "toplev.h"
150 #include "rtl.h"
151 #include "tm_p.h"
152 #include "regs.h"
153 #include "hard-reg-set.h"
154 #include "flags.h"
155 #include "real.h"
156 #include "insn-config.h"
157 #include "recog.h"
158 #include "basic-block.h"
159 #include "output.h"
160 #include "function.h"
161 #include "expr.h"
163 #include "obstack.h"
164 #define obstack_chunk_alloc gmalloc
165 #define obstack_chunk_free free
167 /* Maximum number of passes to perform. */
168 #define MAX_PASSES 1
170 /* Propagate flow information through back edges and thus enable PRE's
171 moving loop invariant calculations out of loops.
173 Originally this tended to create worse overall code, but several
174 improvements during the development of PRE seem to have made following
175 back edges generally a win.
177 Note much of the loop invariant code motion done here would normally
178 be done by loop.c, which has more heuristics for when to move invariants
179 out of loops. At some point we might need to move some of those
180 heuristics into gcse.c. */
181 #define FOLLOW_BACK_EDGES 1
183 /* We support GCSE via Partial Redundancy Elimination. PRE optimizations
184 are a superset of those done by GCSE.
186 We perform the following steps:
188 1) Compute basic block information.
190 2) Compute table of places where registers are set.
192 3) Perform copy/constant propagation.
194 4) Perform global cse.
196 5) Perform another pass of copy/constant propagation.
198 Two passes of copy/constant propagation are done because the first one
199 enables more GCSE and the second one helps to clean up the copies that
200 GCSE creates. This is needed more for PRE than for Classic because Classic
201 GCSE will try to use an existing register containing the common
202 subexpression rather than create a new one. This is harder to do for PRE
203 because of the code motion (which Classic GCSE doesn't do).
205 Expressions we are interested in GCSE-ing are of the form
206 (set (pseudo-reg) (expression)).
207 Function want_to_gcse_p says what these are.
209 PRE handles moving invariant expressions out of loops (by treating them as
210 partially redundant).
212 Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
213 assignment) based GVN (global value numbering). L. T. Simpson's paper
214 (Rice University) on value numbering is a useful reference for this.
216 **********************
218 We used to support multiple passes but there are diminishing returns in
219 doing so. The first pass usually makes 90% of the changes that are doable.
220 A second pass can make a few more changes made possible by the first pass.
221 Experiments show any further passes don't make enough changes to justify
222 the expense.
224 A study of spec92 using an unlimited number of passes:
225 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
226 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
227 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
229 It was found doing copy propagation between each pass enables further
230 substitutions.
232 PRE is quite expensive in complicated functions because the DFA can take
233 awhile to converge. Hence we only perform one pass. Macro MAX_PASSES can
234 be modified if one wants to experiment.
236 **********************
238 The steps for PRE are:
240 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
242 2) Perform the data flow analysis for PRE.
244 3) Delete the redundant instructions
246 4) Insert the required copies [if any] that make the partially
247 redundant instructions fully redundant.
249 5) For other reaching expressions, insert an instruction to copy the value
250 to a newly created pseudo that will reach the redundant instruction.
252 The deletion is done first so that when we do insertions we
253 know which pseudo reg to use.
255 Various papers have argued that PRE DFA is expensive (O(n^2)) and others
256 argue it is not. The number of iterations for the algorithm to converge
257 is typically 2-4 so I don't view it as that expensive (relatively speaking).
259 PRE GCSE depends heavily on the second CSE pass to clean up the copies
260 we create. To make an expression reach the place where it's redundant,
261 the result of the expression is copied to a new register, and the redundant
262 expression is deleted by replacing it with this new register. Classic GCSE
263 doesn't have this problem as much as it computes the reaching defs of
264 each register in each block and thus can try to use an existing register.
266 **********************
268 A fair bit of simplicity is created by creating small functions for simple
269 tasks, even when the function is only called in one place. This may
270 measurably slow things down [or may not] by creating more function call
271 overhead than is necessary. The source is laid out so that it's trivial
272 to make the affected functions inline so that one can measure what speed
273 up, if any, can be achieved, and maybe later when things settle things can
274 be rearranged.
276 Help stamp out big monolithic functions! */
278 /* GCSE global vars. */
280 /* -dG dump file. */
281 static FILE *gcse_file;
283 /* Note whether or not we should run jump optimization after gcse. We
284 want to do this for two cases.
286 * If we changed any jumps via cprop.
288 * If we added any labels via edge splitting. */
290 static int run_jump_opt_after_gcse;
292 /* Bitmaps are normally not included in debugging dumps.
293 However it's useful to be able to print them from GDB.
294 We could create special functions for this, but it's simpler to
295 just allow passing stderr to the dump_foo fns. Since stderr can
296 be a macro, we store a copy here. */
297 static FILE *debug_stderr;
299 /* An obstack for our working variables. */
300 static struct obstack gcse_obstack;
302 /* Non-zero for each mode that supports (set (reg) (reg)).
303 This is trivially true for integer and floating point values.
304 It may or may not be true for condition codes. */
305 static char can_copy_p[(int) NUM_MACHINE_MODES];
307 /* Non-zero if can_copy_p has been initialized. */
308 static int can_copy_init_p;
310 struct reg_use {rtx reg_rtx; };
312 /* Hash table of expressions. */
314 struct expr
316 /* The expression (SET_SRC for expressions, PATTERN for assignments). */
317 rtx expr;
318 /* Index in the available expression bitmaps. */
319 int bitmap_index;
320 /* Next entry with the same hash. */
321 struct expr *next_same_hash;
322 /* List of anticipatable occurrences in basic blocks in the function.
323 An "anticipatable occurrence" is one that is the first occurrence in the
324 basic block, the operands are not modified in the basic block prior
325 to the occurrence and the output is not used between the start of
326 the block and the occurrence. */
327 struct occr *antic_occr;
328 /* List of available occurrence in basic blocks in the function.
329 An "available occurrence" is one that is the last occurrence in the
330 basic block and the operands are not modified by following statements in
331 the basic block [including this insn]. */
332 struct occr *avail_occr;
333 /* Non-null if the computation is PRE redundant.
334 The value is the newly created pseudo-reg to record a copy of the
335 expression in all the places that reach the redundant copy. */
336 rtx reaching_reg;
339 /* Occurrence of an expression.
340 There is one per basic block. If a pattern appears more than once the
341 last appearance is used [or first for anticipatable expressions]. */
343 struct occr
345 /* Next occurrence of this expression. */
346 struct occr *next;
347 /* The insn that computes the expression. */
348 rtx insn;
349 /* Non-zero if this [anticipatable] occurrence has been deleted. */
350 char deleted_p;
351 /* Non-zero if this [available] occurrence has been copied to
352 reaching_reg. */
353 /* ??? This is mutually exclusive with deleted_p, so they could share
354 the same byte. */
355 char copied_p;
358 /* Expression and copy propagation hash tables.
359 Each hash table is an array of buckets.
360 ??? It is known that if it were an array of entries, structure elements
361 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
362 not clear whether in the final analysis a sufficient amount of memory would
363 be saved as the size of the available expression bitmaps would be larger
364 [one could build a mapping table without holes afterwards though].
365 Someday I'll perform the computation and figure it out. */
367 /* Total size of the expression hash table, in elements. */
368 static unsigned int expr_hash_table_size;
370 /* The table itself.
371 This is an array of `expr_hash_table_size' elements. */
372 static struct expr **expr_hash_table;
374 /* Total size of the copy propagation hash table, in elements. */
375 static int set_hash_table_size;
377 /* The table itself.
378 This is an array of `set_hash_table_size' elements. */
379 static struct expr **set_hash_table;
381 /* Mapping of uids to cuids.
382 Only real insns get cuids. */
383 static int *uid_cuid;
385 /* Highest UID in UID_CUID. */
386 static int max_uid;
388 /* Get the cuid of an insn. */
389 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
391 /* Number of cuids. */
392 static int max_cuid;
394 /* Mapping of cuids to insns. */
395 static rtx *cuid_insn;
397 /* Get insn from cuid. */
398 #define CUID_INSN(CUID) (cuid_insn[CUID])
400 /* Maximum register number in function prior to doing gcse + 1.
401 Registers created during this pass have regno >= max_gcse_regno.
402 This is named with "gcse" to not collide with global of same name. */
403 static unsigned int max_gcse_regno;
405 /* Maximum number of cse-able expressions found. */
406 static int n_exprs;
408 /* Maximum number of assignments for copy propagation found. */
409 static int n_sets;
411 /* Table of registers that are modified.
413 For each register, each element is a list of places where the pseudo-reg
414 is set.
416 For simplicity, GCSE is done on sets of pseudo-regs only. PRE GCSE only
417 requires knowledge of which blocks kill which regs [and thus could use
418 a bitmap instead of the lists `reg_set_table' uses].
420 `reg_set_table' and could be turned into an array of bitmaps (num-bbs x
421 num-regs) [however perhaps it may be useful to keep the data as is]. One
422 advantage of recording things this way is that `reg_set_table' is fairly
423 sparse with respect to pseudo regs but for hard regs could be fairly dense
424 [relatively speaking]. And recording sets of pseudo-regs in lists speeds
425 up functions like compute_transp since in the case of pseudo-regs we only
426 need to iterate over the number of times a pseudo-reg is set, not over the
427 number of basic blocks [clearly there is a bit of a slow down in the cases
428 where a pseudo is set more than once in a block, however it is believed
429 that the net effect is to speed things up]. This isn't done for hard-regs
430 because recording call-clobbered hard-regs in `reg_set_table' at each
431 function call can consume a fair bit of memory, and iterating over
432 hard-regs stored this way in compute_transp will be more expensive. */
434 typedef struct reg_set
436 /* The next setting of this register. */
437 struct reg_set *next;
438 /* The insn where it was set. */
439 rtx insn;
440 } reg_set;
442 static reg_set **reg_set_table;
444 /* Size of `reg_set_table'.
445 The table starts out at max_gcse_regno + slop, and is enlarged as
446 necessary. */
447 static int reg_set_table_size;
449 /* Amount to grow `reg_set_table' by when it's full. */
450 #define REG_SET_TABLE_SLOP 100
452 /* Bitmap containing one bit for each register in the program.
453 Used when performing GCSE to track which registers have been set since
454 the start of the basic block. */
455 static sbitmap reg_set_bitmap;
457 /* For each block, a bitmap of registers set in the block.
458 This is used by expr_killed_p and compute_transp.
459 It is computed during hash table computation and not by compute_sets
460 as it includes registers added since the last pass (or between cprop and
461 gcse) and it's currently not easy to realloc sbitmap vectors. */
462 static sbitmap *reg_set_in_block;
464 /* For each block, non-zero if memory is set in that block.
465 This is computed during hash table computation and is used by
466 expr_killed_p and compute_transp.
467 ??? Handling of memory is very simple, we don't make any attempt
468 to optimize things (later).
469 ??? This can be computed by compute_sets since the information
470 doesn't change. */
471 static char *mem_set_in_block;
473 /* Various variables for statistics gathering. */
475 /* Memory used in a pass.
476 This isn't intended to be absolutely precise. Its intent is only
477 to keep an eye on memory usage. */
478 static int bytes_used;
480 /* GCSE substitutions made. */
481 static int gcse_subst_count;
482 /* Number of copy instructions created. */
483 static int gcse_create_count;
484 /* Number of constants propagated. */
485 static int const_prop_count;
486 /* Number of copys propagated. */
487 static int copy_prop_count;
489 /* These variables are used by classic GCSE.
490 Normally they'd be defined a bit later, but `rd_gen' needs to
491 be declared sooner. */
493 /* A bitmap of all ones for implementing the algorithm for available
494 expressions and reaching definitions. */
495 /* ??? Available expression bitmaps have a different size than reaching
496 definition bitmaps. This should be the larger of the two, however, it
497 is not currently used for reaching definitions. */
498 static sbitmap u_bitmap;
500 /* Each block has a bitmap of each type.
501 The length of each blocks bitmap is:
503 max_cuid - for reaching definitions
504 n_exprs - for available expressions
506 Thus we view the bitmaps as 2 dimensional arrays. i.e.
507 rd_kill[block_num][cuid_num]
508 ae_kill[block_num][expr_num] */
510 /* For reaching defs */
511 static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
513 /* for available exprs */
514 static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
516 /* Objects of this type are passed around by the null-pointer check
517 removal routines. */
518 struct null_pointer_info
520 /* The basic block being processed. */
521 int current_block;
522 /* The first register to be handled in this pass. */
523 unsigned int min_reg;
524 /* One greater than the last register to be handled in this pass. */
525 unsigned int max_reg;
526 sbitmap *nonnull_local;
527 sbitmap *nonnull_killed;
530 static void compute_can_copy PARAMS ((void));
531 static char *gmalloc PARAMS ((unsigned int));
532 static char *grealloc PARAMS ((char *, unsigned int));
533 static char *gcse_alloc PARAMS ((unsigned long));
534 static void alloc_gcse_mem PARAMS ((rtx));
535 static void free_gcse_mem PARAMS ((void));
536 static void alloc_reg_set_mem PARAMS ((int));
537 static void free_reg_set_mem PARAMS ((void));
538 static int get_bitmap_width PARAMS ((int, int, int));
539 static void record_one_set PARAMS ((int, rtx));
540 static void record_set_info PARAMS ((rtx, rtx, void *));
541 static void compute_sets PARAMS ((rtx));
542 static void hash_scan_insn PARAMS ((rtx, int, int));
543 static void hash_scan_set PARAMS ((rtx, rtx, int));
544 static void hash_scan_clobber PARAMS ((rtx, rtx));
545 static void hash_scan_call PARAMS ((rtx, rtx));
546 static int want_to_gcse_p PARAMS ((rtx));
547 static int oprs_unchanged_p PARAMS ((rtx, rtx, int));
548 static int oprs_anticipatable_p PARAMS ((rtx, rtx));
549 static int oprs_available_p PARAMS ((rtx, rtx));
550 static void insert_expr_in_table PARAMS ((rtx, enum machine_mode, rtx,
551 int, int));
552 static void insert_set_in_table PARAMS ((rtx, rtx));
553 static unsigned int hash_expr PARAMS ((rtx, enum machine_mode, int *, int));
554 static unsigned int hash_expr_1 PARAMS ((rtx, enum machine_mode, int *));
555 static unsigned int hash_set PARAMS ((int, int));
556 static int expr_equiv_p PARAMS ((rtx, rtx));
557 static void record_last_reg_set_info PARAMS ((rtx, int));
558 static void record_last_mem_set_info PARAMS ((rtx));
559 static void record_last_set_info PARAMS ((rtx, rtx, void *));
560 static void compute_hash_table PARAMS ((int));
561 static void alloc_set_hash_table PARAMS ((int));
562 static void free_set_hash_table PARAMS ((void));
563 static void compute_set_hash_table PARAMS ((void));
564 static void alloc_expr_hash_table PARAMS ((unsigned int));
565 static void free_expr_hash_table PARAMS ((void));
566 static void compute_expr_hash_table PARAMS ((void));
567 static void dump_hash_table PARAMS ((FILE *, const char *, struct expr **,
568 int, int));
569 static struct expr *lookup_expr PARAMS ((rtx));
570 static struct expr *lookup_set PARAMS ((unsigned int, rtx));
571 static struct expr *next_set PARAMS ((unsigned int, struct expr *));
572 static void reset_opr_set_tables PARAMS ((void));
573 static int oprs_not_set_p PARAMS ((rtx, rtx));
574 static void mark_call PARAMS ((rtx));
575 static void mark_set PARAMS ((rtx, rtx));
576 static void mark_clobber PARAMS ((rtx, rtx));
577 static void mark_oprs_set PARAMS ((rtx));
578 static void alloc_cprop_mem PARAMS ((int, int));
579 static void free_cprop_mem PARAMS ((void));
580 static void compute_transp PARAMS ((rtx, int, sbitmap *, int));
581 static void compute_transpout PARAMS ((void));
582 static void compute_local_properties PARAMS ((sbitmap *, sbitmap *, sbitmap *,
583 int));
584 static void compute_cprop_data PARAMS ((void));
585 static void find_used_regs PARAMS ((rtx));
586 static int try_replace_reg PARAMS ((rtx, rtx, rtx));
587 static struct expr *find_avail_set PARAMS ((int, rtx));
588 static int cprop_jump PARAMS ((rtx, rtx, struct reg_use *, rtx));
589 #ifdef HAVE_cc0
590 static int cprop_cc0_jump PARAMS ((rtx, struct reg_use *, rtx));
591 #endif
592 static int cprop_insn PARAMS ((rtx, int));
593 static int cprop PARAMS ((int));
594 static int one_cprop_pass PARAMS ((int, int));
595 static void alloc_pre_mem PARAMS ((int, int));
596 static void free_pre_mem PARAMS ((void));
597 static void compute_pre_data PARAMS ((void));
598 static int pre_expr_reaches_here_p PARAMS ((int, struct expr *, int));
599 static void insert_insn_end_bb PARAMS ((struct expr *, int, int));
600 static void pre_insert_copy_insn PARAMS ((struct expr *, rtx));
601 static void pre_insert_copies PARAMS ((void));
602 static int pre_delete PARAMS ((void));
603 static int pre_gcse PARAMS ((void));
604 static int one_pre_gcse_pass PARAMS ((int));
605 static void add_label_notes PARAMS ((rtx, rtx));
606 static void alloc_code_hoist_mem PARAMS ((int, int));
607 static void free_code_hoist_mem PARAMS ((void));
608 static void compute_code_hoist_vbeinout PARAMS ((void));
609 static void compute_code_hoist_data PARAMS ((void));
610 static int hoist_expr_reaches_here_p PARAMS ((int, int, int, char *));
611 static void hoist_code PARAMS ((void));
612 static int one_code_hoisting_pass PARAMS ((void));
613 static void alloc_rd_mem PARAMS ((int, int));
614 static void free_rd_mem PARAMS ((void));
615 static void handle_rd_kill_set PARAMS ((rtx, int, int));
616 static void compute_kill_rd PARAMS ((void));
617 static void compute_rd PARAMS ((void));
618 static void alloc_avail_expr_mem PARAMS ((int, int));
619 static void free_avail_expr_mem PARAMS ((void));
620 static void compute_ae_gen PARAMS ((void));
621 static int expr_killed_p PARAMS ((rtx, int));
622 static void compute_ae_kill PARAMS ((sbitmap *, sbitmap *));
623 static int expr_reaches_here_p PARAMS ((struct occr *, struct expr *,
624 int, int));
625 static rtx computing_insn PARAMS ((struct expr *, rtx));
626 static int def_reaches_here_p PARAMS ((rtx, rtx));
627 static int can_disregard_other_sets PARAMS ((struct reg_set **, rtx, int));
628 static int handle_avail_expr PARAMS ((rtx, struct expr *));
629 static int classic_gcse PARAMS ((void));
630 static int one_classic_gcse_pass PARAMS ((int));
631 static void invalidate_nonnull_info PARAMS ((rtx, rtx, void *));
632 static void delete_null_pointer_checks_1 PARAMS ((unsigned int *, sbitmap *,
633 sbitmap *,
634 struct null_pointer_info *));
635 static rtx process_insert_insn PARAMS ((struct expr *));
636 static int pre_edge_insert PARAMS ((struct edge_list *, struct expr **));
637 static int expr_reaches_here_p_work PARAMS ((struct occr *, struct expr *,
638 int, int, char *));
639 static int pre_expr_reaches_here_p_work PARAMS ((int, struct expr *,
640 int, char *));
642 /* Entry point for global common subexpression elimination.
643 F is the first instruction in the function. */
646 gcse_main (f, file)
647 rtx f;
648 FILE *file;
650 int changed, pass;
651 /* Bytes used at start of pass. */
652 int initial_bytes_used;
653 /* Maximum number of bytes used by a pass. */
654 int max_pass_bytes;
655 /* Point to release obstack data from for each pass. */
656 char *gcse_obstack_bottom;
658 /* We do not construct an accurate cfg in functions which call
659 setjmp, so just punt to be safe. */
660 if (current_function_calls_setjmp)
661 return 0;
663 /* Assume that we do not need to run jump optimizations after gcse. */
664 run_jump_opt_after_gcse = 0;
666 /* For calling dump_foo fns from gdb. */
667 debug_stderr = stderr;
668 gcse_file = file;
670 /* Identify the basic block information for this function, including
671 successors and predecessors. */
672 max_gcse_regno = max_reg_num ();
674 if (file)
675 dump_flow_info (file);
677 /* Return if there's nothing to do. */
678 if (n_basic_blocks <= 1)
679 return 0;
681 /* Trying to perform global optimizations on flow graphs which have
682 a high connectivity will take a long time and is unlikely to be
683 particularly useful.
685 In normal circumstances a cfg should have about twice has many edges
686 as blocks. But we do not want to punish small functions which have
687 a couple switch statements. So we require a relatively large number
688 of basic blocks and the ratio of edges to blocks to be high. */
689 if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
690 return 0;
692 /* See what modes support reg/reg copy operations. */
693 if (! can_copy_init_p)
695 compute_can_copy ();
696 can_copy_init_p = 1;
699 gcc_obstack_init (&gcse_obstack);
700 bytes_used = 0;
702 /* Record where pseudo-registers are set. This data is kept accurate
703 during each pass. ??? We could also record hard-reg information here
704 [since it's unchanging], however it is currently done during hash table
705 computation.
707 It may be tempting to compute MEM set information here too, but MEM sets
708 will be subject to code motion one day and thus we need to compute
709 information about memory sets when we build the hash tables. */
711 alloc_reg_set_mem (max_gcse_regno);
712 compute_sets (f);
714 pass = 0;
715 initial_bytes_used = bytes_used;
716 max_pass_bytes = 0;
717 gcse_obstack_bottom = gcse_alloc (1);
718 changed = 1;
719 while (changed && pass < MAX_PASSES)
721 changed = 0;
722 if (file)
723 fprintf (file, "GCSE pass %d\n\n", pass + 1);
725 /* Initialize bytes_used to the space for the pred/succ lists,
726 and the reg_set_table data. */
727 bytes_used = initial_bytes_used;
729 /* Each pass may create new registers, so recalculate each time. */
730 max_gcse_regno = max_reg_num ();
732 alloc_gcse_mem (f);
734 /* Don't allow constant propagation to modify jumps
735 during this pass. */
736 changed = one_cprop_pass (pass + 1, 0);
738 if (optimize_size)
739 changed |= one_classic_gcse_pass (pass + 1);
740 else
742 changed |= one_pre_gcse_pass (pass + 1);
743 free_reg_set_mem ();
744 alloc_reg_set_mem (max_reg_num ());
745 compute_sets (f);
746 run_jump_opt_after_gcse = 1;
749 if (max_pass_bytes < bytes_used)
750 max_pass_bytes = bytes_used;
752 /* Free up memory, then reallocate for code hoisting. We can
753 not re-use the existing allocated memory because the tables
754 will not have info for the insns or registers created by
755 partial redundancy elimination. */
756 free_gcse_mem ();
758 /* It does not make sense to run code hoisting unless we optimizing
759 for code size -- it rarely makes programs faster, and can make
760 them bigger if we did partial redundancy elimination (when optimizing
761 for space, we use a classic gcse algorithm instead of partial
762 redundancy algorithms). */
763 if (optimize_size)
765 max_gcse_regno = max_reg_num ();
766 alloc_gcse_mem (f);
767 changed |= one_code_hoisting_pass ();
768 free_gcse_mem ();
770 if (max_pass_bytes < bytes_used)
771 max_pass_bytes = bytes_used;
774 if (file)
776 fprintf (file, "\n");
777 fflush (file);
780 obstack_free (&gcse_obstack, gcse_obstack_bottom);
781 pass++;
784 /* Do one last pass of copy propagation, including cprop into
785 conditional jumps. */
787 max_gcse_regno = max_reg_num ();
788 alloc_gcse_mem (f);
789 /* This time, go ahead and allow cprop to alter jumps. */
790 one_cprop_pass (pass + 1, 1);
791 free_gcse_mem ();
793 if (file)
795 fprintf (file, "GCSE of %s: %d basic blocks, ",
796 current_function_name, n_basic_blocks);
797 fprintf (file, "%d pass%s, %d bytes\n\n",
798 pass, pass > 1 ? "es" : "", max_pass_bytes);
801 obstack_free (&gcse_obstack, NULL_PTR);
802 free_reg_set_mem ();
803 return run_jump_opt_after_gcse;
806 /* Misc. utilities. */
808 /* Compute which modes support reg/reg copy operations. */
810 static void
811 compute_can_copy ()
813 int i;
814 #ifndef AVOID_CCMODE_COPIES
815 rtx reg,insn;
816 #endif
817 char *free_point = (char *) oballoc (1);
819 bzero (can_copy_p, NUM_MACHINE_MODES);
821 start_sequence ();
822 for (i = 0; i < NUM_MACHINE_MODES; i++)
823 if (GET_MODE_CLASS (i) == MODE_CC)
825 #ifdef AVOID_CCMODE_COPIES
826 can_copy_p[i] = 0;
827 #else
828 reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
829 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
830 if (recog (PATTERN (insn), insn, NULL_PTR) >= 0)
831 can_copy_p[i] = 1;
832 #endif
834 else
835 can_copy_p[i] = 1;
837 end_sequence ();
839 /* Free the objects we just allocated. */
840 obfree (free_point);
843 /* Cover function to xmalloc to record bytes allocated. */
845 static char *
846 gmalloc (size)
847 unsigned int size;
849 bytes_used += size;
850 return xmalloc (size);
853 /* Cover function to xrealloc.
854 We don't record the additional size since we don't know it.
855 It won't affect memory usage stats much anyway. */
857 static char *
858 grealloc (ptr, size)
859 char *ptr;
860 unsigned int size;
862 return xrealloc (ptr, size);
865 /* Cover function to obstack_alloc.
866 We don't need to record the bytes allocated here since
867 obstack_chunk_alloc is set to gmalloc. */
869 static char *
870 gcse_alloc (size)
871 unsigned long size;
873 return (char *) obstack_alloc (&gcse_obstack, size);
876 /* Allocate memory for the cuid mapping array,
877 and reg/memory set tracking tables.
879 This is called at the start of each pass. */
881 static void
882 alloc_gcse_mem (f)
883 rtx f;
885 int i,n;
886 rtx insn;
888 /* Find the largest UID and create a mapping from UIDs to CUIDs.
889 CUIDs are like UIDs except they increase monotonically, have no gaps,
890 and only apply to real insns. */
892 max_uid = get_max_uid ();
893 n = (max_uid + 1) * sizeof (int);
894 uid_cuid = (int *) gmalloc (n);
895 bzero ((char *) uid_cuid, n);
896 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
898 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
899 INSN_CUID (insn) = i++;
900 else
901 INSN_CUID (insn) = i;
904 /* Create a table mapping cuids to insns. */
906 max_cuid = i;
907 n = (max_cuid + 1) * sizeof (rtx);
908 cuid_insn = (rtx *) gmalloc (n);
909 bzero ((char *) cuid_insn, n);
910 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
911 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
912 CUID_INSN (i++) = insn;
914 /* Allocate vars to track sets of regs. */
915 reg_set_bitmap = (sbitmap) sbitmap_alloc (max_gcse_regno);
917 /* Allocate vars to track sets of regs, memory per block. */
918 reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks,
919 max_gcse_regno);
920 mem_set_in_block = (char *) gmalloc (n_basic_blocks);
923 /* Free memory allocated by alloc_gcse_mem. */
925 static void
926 free_gcse_mem ()
928 free (uid_cuid);
929 free (cuid_insn);
931 free (reg_set_bitmap);
933 free (reg_set_in_block);
934 free (mem_set_in_block);
937 /* Many of the global optimization algorithms work by solving dataflow
938 equations for various expressions. Initially, some local value is
939 computed for each expression in each block. Then, the values across the
940 various blocks are combined (by following flow graph edges) to arrive at
941 global values. Conceptually, each set of equations is independent. We
942 may therefore solve all the equations in parallel, solve them one at a
943 time, or pick any intermediate approach.
945 When you're going to need N two-dimensional bitmaps, each X (say, the
946 number of blocks) by Y (say, the number of expressions), call this
947 function. It's not important what X and Y represent; only that Y
948 correspond to the things that can be done in parallel. This function will
949 return an appropriate chunking factor C; you should solve C sets of
950 equations in parallel. By going through this function, we can easily
951 trade space against time; by solving fewer equations in parallel we use
952 less space. */
954 static int
955 get_bitmap_width (n, x, y)
956 int n;
957 int x;
958 int y;
960 /* It's not really worth figuring out *exactly* how much memory will
961 be used by a particular choice. The important thing is to get
962 something approximately right. */
963 size_t max_bitmap_memory = 10 * 1024 * 1024;
965 /* The number of bytes we'd use for a single column of minimum
966 width. */
967 size_t column_size = n * x * sizeof (SBITMAP_ELT_TYPE);
969 /* Often, it's reasonable just to solve all the equations in
970 parallel. */
971 if (column_size * SBITMAP_SET_SIZE (y) <= max_bitmap_memory)
972 return y;
974 /* Otherwise, pick the largest width we can, without going over the
975 limit. */
976 return SBITMAP_ELT_BITS * ((max_bitmap_memory + column_size - 1)
977 / column_size);
980 /* Compute the local properties of each recorded expression.
982 Local properties are those that are defined by the block, irrespective of
983 other blocks.
985 An expression is transparent in a block if its operands are not modified
986 in the block.
988 An expression is computed (locally available) in a block if it is computed
989 at least once and expression would contain the same value if the
990 computation was moved to the end of the block.
992 An expression is locally anticipatable in a block if it is computed at
993 least once and expression would contain the same value if the computation
994 was moved to the beginning of the block.
996 We call this routine for cprop, pre and code hoisting. They all compute
997 basically the same information and thus can easily share this code.
999 TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
1000 properties. If NULL, then it is not necessary to compute or record that
1001 particular property.
1003 SETP controls which hash table to look at. If zero, this routine looks at
1004 the expr hash table; if nonzero this routine looks at the set hash table.
1005 Additionally, TRANSP is computed as ~TRANSP, since this is really cprop's
1006 ABSALTERED. */
1008 static void
1009 compute_local_properties (transp, comp, antloc, setp)
1010 sbitmap *transp;
1011 sbitmap *comp;
1012 sbitmap *antloc;
1013 int setp;
1015 unsigned int i, hash_table_size;
1016 struct expr **hash_table;
1018 /* Initialize any bitmaps that were passed in. */
1019 if (transp)
1021 if (setp)
1022 sbitmap_vector_zero (transp, n_basic_blocks);
1023 else
1024 sbitmap_vector_ones (transp, n_basic_blocks);
1027 if (comp)
1028 sbitmap_vector_zero (comp, n_basic_blocks);
1029 if (antloc)
1030 sbitmap_vector_zero (antloc, n_basic_blocks);
1032 /* We use the same code for cprop, pre and hoisting. For cprop
1033 we care about the set hash table, for pre and hoisting we
1034 care about the expr hash table. */
1035 hash_table_size = setp ? set_hash_table_size : expr_hash_table_size;
1036 hash_table = setp ? set_hash_table : expr_hash_table;
1038 for (i = 0; i < hash_table_size; i++)
1040 struct expr *expr;
1042 for (expr = hash_table[i]; expr != NULL; expr = expr->next_same_hash)
1044 int indx = expr->bitmap_index;
1045 struct occr *occr;
1047 /* The expression is transparent in this block if it is not killed.
1048 We start by assuming all are transparent [none are killed], and
1049 then reset the bits for those that are. */
1050 if (transp)
1051 compute_transp (expr->expr, indx, transp, setp);
1053 /* The occurrences recorded in antic_occr are exactly those that
1054 we want to set to non-zero in ANTLOC. */
1055 if (antloc)
1056 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1058 SET_BIT (antloc[BLOCK_NUM (occr->insn)], indx);
1060 /* While we're scanning the table, this is a good place to
1061 initialize this. */
1062 occr->deleted_p = 0;
1065 /* The occurrences recorded in avail_occr are exactly those that
1066 we want to set to non-zero in COMP. */
1067 if (comp)
1068 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
1070 SET_BIT (comp[BLOCK_NUM (occr->insn)], indx);
1072 /* While we're scanning the table, this is a good place to
1073 initialize this. */
1074 occr->copied_p = 0;
1077 /* While we're scanning the table, this is a good place to
1078 initialize this. */
1079 expr->reaching_reg = 0;
1084 /* Register set information.
1086 `reg_set_table' records where each register is set or otherwise
1087 modified. */
1089 static struct obstack reg_set_obstack;
1091 static void
1092 alloc_reg_set_mem (n_regs)
1093 int n_regs;
1095 unsigned int n;
1097 reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
1098 n = reg_set_table_size * sizeof (struct reg_set *);
1099 reg_set_table = (struct reg_set **) gmalloc (n);
1100 bzero ((char *) reg_set_table, n);
1102 gcc_obstack_init (&reg_set_obstack);
1105 static void
1106 free_reg_set_mem ()
1108 free (reg_set_table);
1109 obstack_free (&reg_set_obstack, NULL_PTR);
1112 /* Record REGNO in the reg_set table. */
1114 static void
1115 record_one_set (regno, insn)
1116 int regno;
1117 rtx insn;
1119 /* allocate a new reg_set element and link it onto the list */
1120 struct reg_set *new_reg_info, *reg_info_ptr1, *reg_info_ptr2;
1122 /* If the table isn't big enough, enlarge it. */
1123 if (regno >= reg_set_table_size)
1125 int new_size = regno + REG_SET_TABLE_SLOP;
1127 reg_set_table
1128 = (struct reg_set **) grealloc ((char *) reg_set_table,
1129 new_size * sizeof (struct reg_set *));
1130 bzero ((char *) (reg_set_table + reg_set_table_size),
1131 (new_size - reg_set_table_size) * sizeof (struct reg_set *));
1132 reg_set_table_size = new_size;
1135 new_reg_info = (struct reg_set *) obstack_alloc (&reg_set_obstack,
1136 sizeof (struct reg_set));
1137 bytes_used += sizeof (struct reg_set);
1138 new_reg_info->insn = insn;
1139 new_reg_info->next = NULL;
1140 if (reg_set_table[regno] == NULL)
1141 reg_set_table[regno] = new_reg_info;
1142 else
1144 reg_info_ptr1 = reg_info_ptr2 = reg_set_table[regno];
1145 /* ??? One could keep a "last" pointer to speed this up. */
1146 while (reg_info_ptr1 != NULL)
1148 reg_info_ptr2 = reg_info_ptr1;
1149 reg_info_ptr1 = reg_info_ptr1->next;
1152 reg_info_ptr2->next = new_reg_info;
1156 /* Called from compute_sets via note_stores to handle one SET or CLOBBER in
1157 an insn. The DATA is really the instruction in which the SET is
1158 occurring. */
1160 static void
1161 record_set_info (dest, setter, data)
1162 rtx dest, setter ATTRIBUTE_UNUSED;
1163 void *data;
1165 rtx record_set_insn = (rtx) data;
1167 if (GET_CODE (dest) == REG && REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1168 record_one_set (REGNO (dest), record_set_insn);
1171 /* Scan the function and record each set of each pseudo-register.
1173 This is called once, at the start of the gcse pass. See the comments for
1174 `reg_set_table' for further documenation. */
1176 static void
1177 compute_sets (f)
1178 rtx f;
1180 rtx insn;
1182 for (insn = f; insn != 0; insn = NEXT_INSN (insn))
1183 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
1184 note_stores (PATTERN (insn), record_set_info, insn);
1187 /* Hash table support. */
1189 /* For each register, the cuid of the first/last insn in the block to set it,
1190 or -1 if not set. */
1191 #define NEVER_SET -1
1192 static int *reg_first_set;
1193 static int *reg_last_set;
1195 /* While computing "first/last set" info, this is the CUID of first/last insn
1196 to set memory or -1 if not set. `mem_last_set' is also used when
1197 performing GCSE to record whether memory has been set since the beginning
1198 of the block.
1200 Note that handling of memory is very simple, we don't make any attempt
1201 to optimize things (later). */
1202 static int mem_first_set;
1203 static int mem_last_set;
1205 /* Perform a quick check whether X, the source of a set, is something
1206 we want to consider for GCSE. */
1208 static int
1209 want_to_gcse_p (x)
1210 rtx x;
1212 switch (GET_CODE (x))
1214 case REG:
1215 case SUBREG:
1216 case CONST_INT:
1217 case CONST_DOUBLE:
1218 case CALL:
1219 return 0;
1221 default:
1222 break;
1225 return 1;
1228 /* Return non-zero if the operands of expression X are unchanged from the
1229 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1230 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */
1232 static int
1233 oprs_unchanged_p (x, insn, avail_p)
1234 rtx x, insn;
1235 int avail_p;
1237 int i, j;
1238 enum rtx_code code;
1239 const char *fmt;
1241 if (x == 0)
1242 return 1;
1244 code = GET_CODE (x);
1245 switch (code)
1247 case REG:
1248 if (avail_p)
1249 return (reg_last_set[REGNO (x)] == NEVER_SET
1250 || reg_last_set[REGNO (x)] < INSN_CUID (insn));
1251 else
1252 return (reg_first_set[REGNO (x)] == NEVER_SET
1253 || reg_first_set[REGNO (x)] >= INSN_CUID (insn));
1255 case MEM:
1256 if (avail_p && mem_last_set != NEVER_SET
1257 && mem_last_set >= INSN_CUID (insn))
1258 return 0;
1259 else if (! avail_p && mem_first_set != NEVER_SET
1260 && mem_first_set < INSN_CUID (insn))
1261 return 0;
1262 else
1263 return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
1265 case PRE_DEC:
1266 case PRE_INC:
1267 case POST_DEC:
1268 case POST_INC:
1269 return 0;
1271 case PC:
1272 case CC0: /*FIXME*/
1273 case CONST:
1274 case CONST_INT:
1275 case CONST_DOUBLE:
1276 case SYMBOL_REF:
1277 case LABEL_REF:
1278 case ADDR_VEC:
1279 case ADDR_DIFF_VEC:
1280 return 1;
1282 default:
1283 break;
1286 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1288 if (fmt[i] == 'e')
1290 /* If we are about to do the last recursive call needed at this
1291 level, change it into iteration. This function is called enough
1292 to be worth it. */
1293 if (i == 0)
1294 return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
1296 else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
1297 return 0;
1299 else if (fmt[i] == 'E')
1300 for (j = 0; j < XVECLEN (x, i); j++)
1301 if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1302 return 0;
1305 return 1;
1308 /* Return non-zero if the operands of expression X are unchanged from
1309 the start of INSN's basic block up to but not including INSN. */
1311 static int
1312 oprs_anticipatable_p (x, insn)
1313 rtx x, insn;
1315 return oprs_unchanged_p (x, insn, 0);
1318 /* Return non-zero if the operands of expression X are unchanged from
1319 INSN to the end of INSN's basic block. */
1321 static int
1322 oprs_available_p (x, insn)
1323 rtx x, insn;
1325 return oprs_unchanged_p (x, insn, 1);
1328 /* Hash expression X.
1330 MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean
1331 indicating if a volatile operand is found or if the expression contains
1332 something we don't want to insert in the table.
1334 ??? One might want to merge this with canon_hash. Later. */
1336 static unsigned int
1337 hash_expr (x, mode, do_not_record_p, hash_table_size)
1338 rtx x;
1339 enum machine_mode mode;
1340 int *do_not_record_p;
1341 int hash_table_size;
1343 unsigned int hash;
1345 *do_not_record_p = 0;
1347 hash = hash_expr_1 (x, mode, do_not_record_p);
1348 return hash % hash_table_size;
1351 /* Subroutine of hash_expr to do the actual work. */
1353 static unsigned int
1354 hash_expr_1 (x, mode, do_not_record_p)
1355 rtx x;
1356 enum machine_mode mode;
1357 int *do_not_record_p;
1359 int i, j;
1360 unsigned hash = 0;
1361 enum rtx_code code;
1362 const char *fmt;
1364 /* Used to turn recursion into iteration. We can't rely on GCC's
1365 tail-recursion eliminatio since we need to keep accumulating values
1366 in HASH. */
1368 if (x == 0)
1369 return hash;
1371 repeat:
1372 code = GET_CODE (x);
1373 switch (code)
1375 case REG:
1376 hash += ((unsigned int) REG << 7) + REGNO (x);
1377 return hash;
1379 case CONST_INT:
1380 hash += (((unsigned int) CONST_INT << 7) + (unsigned int) mode
1381 + (unsigned int) INTVAL (x));
1382 return hash;
1384 case CONST_DOUBLE:
1385 /* This is like the general case, except that it only counts
1386 the integers representing the constant. */
1387 hash += (unsigned int) code + (unsigned int) GET_MODE (x);
1388 if (GET_MODE (x) != VOIDmode)
1389 for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++)
1390 hash += (unsigned int) XWINT (x, i);
1391 else
1392 hash += ((unsigned int) CONST_DOUBLE_LOW (x)
1393 + (unsigned int) CONST_DOUBLE_HIGH (x));
1394 return hash;
1396 /* Assume there is only one rtx object for any given label. */
1397 case LABEL_REF:
1398 /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1399 differences and differences between each stage's debugging dumps. */
1400 hash += (((unsigned int) LABEL_REF << 7)
1401 + CODE_LABEL_NUMBER (XEXP (x, 0)));
1402 return hash;
1404 case SYMBOL_REF:
1406 /* Don't hash on the symbol's address to avoid bootstrap differences.
1407 Different hash values may cause expressions to be recorded in
1408 different orders and thus different registers to be used in the
1409 final assembler. This also avoids differences in the dump files
1410 between various stages. */
1411 unsigned int h = 0;
1412 const unsigned char *p = (const unsigned char *) XSTR (x, 0);
1414 while (*p)
1415 h += (h << 7) + *p++; /* ??? revisit */
1417 hash += ((unsigned int) SYMBOL_REF << 7) + h;
1418 return hash;
1421 case MEM:
1422 if (MEM_VOLATILE_P (x))
1424 *do_not_record_p = 1;
1425 return 0;
1428 hash += (unsigned int) MEM;
1429 hash += MEM_ALIAS_SET (x);
1430 x = XEXP (x, 0);
1431 goto repeat;
1433 case PRE_DEC:
1434 case PRE_INC:
1435 case POST_DEC:
1436 case POST_INC:
1437 case PC:
1438 case CC0:
1439 case CALL:
1440 case UNSPEC_VOLATILE:
1441 *do_not_record_p = 1;
1442 return 0;
1444 case ASM_OPERANDS:
1445 if (MEM_VOLATILE_P (x))
1447 *do_not_record_p = 1;
1448 return 0;
1451 default:
1452 break;
1455 hash += (unsigned) code + (unsigned) GET_MODE (x);
1456 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1458 if (fmt[i] == 'e')
1460 /* If we are about to do the last recursive call
1461 needed at this level, change it into iteration.
1462 This function is called enough to be worth it. */
1463 if (i == 0)
1465 x = XEXP (x, i);
1466 goto repeat;
1469 hash += hash_expr_1 (XEXP (x, i), 0, do_not_record_p);
1470 if (*do_not_record_p)
1471 return 0;
1474 else if (fmt[i] == 'E')
1475 for (j = 0; j < XVECLEN (x, i); j++)
1477 hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p);
1478 if (*do_not_record_p)
1479 return 0;
1482 else if (fmt[i] == 's')
1484 register const unsigned char *p =
1485 (const unsigned char *) XSTR (x, i);
1487 if (p)
1488 while (*p)
1489 hash += *p++;
1491 else if (fmt[i] == 'i')
1492 hash += (unsigned int) XINT (x, i);
1493 else
1494 abort ();
1497 return hash;
1500 /* Hash a set of register REGNO.
1502 Sets are hashed on the register that is set. This simplifies the PRE copy
1503 propagation code.
1505 ??? May need to make things more elaborate. Later, as necessary. */
1507 static unsigned int
1508 hash_set (regno, hash_table_size)
1509 int regno;
1510 int hash_table_size;
1512 unsigned int hash;
1514 hash = regno;
1515 return hash % hash_table_size;
1518 /* Return non-zero if exp1 is equivalent to exp2.
1519 ??? Borrowed from cse.c. Might want to remerge with cse.c. Later. */
1521 static int
1522 expr_equiv_p (x, y)
1523 rtx x, y;
1525 register int i, j;
1526 register enum rtx_code code;
1527 register const char *fmt;
1529 if (x == y)
1530 return 1;
1532 if (x == 0 || y == 0)
1533 return x == y;
1535 code = GET_CODE (x);
1536 if (code != GET_CODE (y))
1537 return 0;
1539 /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent. */
1540 if (GET_MODE (x) != GET_MODE (y))
1541 return 0;
1543 switch (code)
1545 case PC:
1546 case CC0:
1547 return x == y;
1549 case CONST_INT:
1550 return INTVAL (x) == INTVAL (y);
1552 case LABEL_REF:
1553 return XEXP (x, 0) == XEXP (y, 0);
1555 case SYMBOL_REF:
1556 return XSTR (x, 0) == XSTR (y, 0);
1558 case REG:
1559 return REGNO (x) == REGNO (y);
1561 case MEM:
1562 /* Can't merge two expressions in different alias sets, since we can
1563 decide that the expression is transparent in a block when it isn't,
1564 due to it being set with the different alias set. */
1565 if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y))
1566 return 0;
1567 break;
1569 /* For commutative operations, check both orders. */
1570 case PLUS:
1571 case MULT:
1572 case AND:
1573 case IOR:
1574 case XOR:
1575 case NE:
1576 case EQ:
1577 return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0))
1578 && expr_equiv_p (XEXP (x, 1), XEXP (y, 1)))
1579 || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1))
1580 && expr_equiv_p (XEXP (x, 1), XEXP (y, 0))));
1582 default:
1583 break;
1586 /* Compare the elements. If any pair of corresponding elements
1587 fail to match, return 0 for the whole thing. */
1589 fmt = GET_RTX_FORMAT (code);
1590 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1592 switch (fmt[i])
1594 case 'e':
1595 if (! expr_equiv_p (XEXP (x, i), XEXP (y, i)))
1596 return 0;
1597 break;
1599 case 'E':
1600 if (XVECLEN (x, i) != XVECLEN (y, i))
1601 return 0;
1602 for (j = 0; j < XVECLEN (x, i); j++)
1603 if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j)))
1604 return 0;
1605 break;
1607 case 's':
1608 if (strcmp (XSTR (x, i), XSTR (y, i)))
1609 return 0;
1610 break;
1612 case 'i':
1613 if (XINT (x, i) != XINT (y, i))
1614 return 0;
1615 break;
1617 case 'w':
1618 if (XWINT (x, i) != XWINT (y, i))
1619 return 0;
1620 break;
1622 case '0':
1623 break;
1625 default:
1626 abort ();
1630 return 1;
1633 /* Insert expression X in INSN in the hash table.
1634 If it is already present, record it as the last occurrence in INSN's
1635 basic block.
1637 MODE is the mode of the value X is being stored into.
1638 It is only used if X is a CONST_INT.
1640 ANTIC_P is non-zero if X is an anticipatable expression.
1641 AVAIL_P is non-zero if X is an available expression. */
1643 static void
1644 insert_expr_in_table (x, mode, insn, antic_p, avail_p)
1645 rtx x;
1646 enum machine_mode mode;
1647 rtx insn;
1648 int antic_p, avail_p;
1650 int found, do_not_record_p;
1651 unsigned int hash;
1652 struct expr *cur_expr, *last_expr = NULL;
1653 struct occr *antic_occr, *avail_occr;
1654 struct occr *last_occr = NULL;
1656 hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size);
1658 /* Do not insert expression in table if it contains volatile operands,
1659 or if hash_expr determines the expression is something we don't want
1660 to or can't handle. */
1661 if (do_not_record_p)
1662 return;
1664 cur_expr = expr_hash_table[hash];
1665 found = 0;
1667 while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1669 /* If the expression isn't found, save a pointer to the end of
1670 the list. */
1671 last_expr = cur_expr;
1672 cur_expr = cur_expr->next_same_hash;
1675 if (! found)
1677 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1678 bytes_used += sizeof (struct expr);
1679 if (expr_hash_table[hash] == NULL)
1680 /* This is the first pattern that hashed to this index. */
1681 expr_hash_table[hash] = cur_expr;
1682 else
1683 /* Add EXPR to end of this hash chain. */
1684 last_expr->next_same_hash = cur_expr;
1686 /* Set the fields of the expr element. */
1687 cur_expr->expr = x;
1688 cur_expr->bitmap_index = n_exprs++;
1689 cur_expr->next_same_hash = NULL;
1690 cur_expr->antic_occr = NULL;
1691 cur_expr->avail_occr = NULL;
1694 /* Now record the occurrence(s). */
1695 if (antic_p)
1697 antic_occr = cur_expr->antic_occr;
1699 /* Search for another occurrence in the same basic block. */
1700 while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
1702 /* If an occurrence isn't found, save a pointer to the end of
1703 the list. */
1704 last_occr = antic_occr;
1705 antic_occr = antic_occr->next;
1708 if (antic_occr)
1709 /* Found another instance of the expression in the same basic block.
1710 Prefer the currently recorded one. We want the first one in the
1711 block and the block is scanned from start to end. */
1712 ; /* nothing to do */
1713 else
1715 /* First occurrence of this expression in this basic block. */
1716 antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1717 bytes_used += sizeof (struct occr);
1718 /* First occurrence of this expression in any block? */
1719 if (cur_expr->antic_occr == NULL)
1720 cur_expr->antic_occr = antic_occr;
1721 else
1722 last_occr->next = antic_occr;
1724 antic_occr->insn = insn;
1725 antic_occr->next = NULL;
1729 if (avail_p)
1731 avail_occr = cur_expr->avail_occr;
1733 /* Search for another occurrence in the same basic block. */
1734 while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn))
1736 /* If an occurrence isn't found, save a pointer to the end of
1737 the list. */
1738 last_occr = avail_occr;
1739 avail_occr = avail_occr->next;
1742 if (avail_occr)
1743 /* Found another instance of the expression in the same basic block.
1744 Prefer this occurrence to the currently recorded one. We want
1745 the last one in the block and the block is scanned from start
1746 to end. */
1747 avail_occr->insn = insn;
1748 else
1750 /* First occurrence of this expression in this basic block. */
1751 avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1752 bytes_used += sizeof (struct occr);
1754 /* First occurrence of this expression in any block? */
1755 if (cur_expr->avail_occr == NULL)
1756 cur_expr->avail_occr = avail_occr;
1757 else
1758 last_occr->next = avail_occr;
1760 avail_occr->insn = insn;
1761 avail_occr->next = NULL;
1766 /* Insert pattern X in INSN in the hash table.
1767 X is a SET of a reg to either another reg or a constant.
1768 If it is already present, record it as the last occurrence in INSN's
1769 basic block. */
1771 static void
1772 insert_set_in_table (x, insn)
1773 rtx x;
1774 rtx insn;
1776 int found;
1777 unsigned int hash;
1778 struct expr *cur_expr, *last_expr = NULL;
1779 struct occr *cur_occr, *last_occr = NULL;
1781 if (GET_CODE (x) != SET
1782 || GET_CODE (SET_DEST (x)) != REG)
1783 abort ();
1785 hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size);
1787 cur_expr = set_hash_table[hash];
1788 found = 0;
1790 while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1792 /* If the expression isn't found, save a pointer to the end of
1793 the list. */
1794 last_expr = cur_expr;
1795 cur_expr = cur_expr->next_same_hash;
1798 if (! found)
1800 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1801 bytes_used += sizeof (struct expr);
1802 if (set_hash_table[hash] == NULL)
1803 /* This is the first pattern that hashed to this index. */
1804 set_hash_table[hash] = cur_expr;
1805 else
1806 /* Add EXPR to end of this hash chain. */
1807 last_expr->next_same_hash = cur_expr;
1809 /* Set the fields of the expr element.
1810 We must copy X because it can be modified when copy propagation is
1811 performed on its operands. */
1812 /* ??? Should this go in a different obstack? */
1813 cur_expr->expr = copy_rtx (x);
1814 cur_expr->bitmap_index = n_sets++;
1815 cur_expr->next_same_hash = NULL;
1816 cur_expr->antic_occr = NULL;
1817 cur_expr->avail_occr = NULL;
1820 /* Now record the occurrence. */
1821 cur_occr = cur_expr->avail_occr;
1823 /* Search for another occurrence in the same basic block. */
1824 while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn))
1826 /* If an occurrence isn't found, save a pointer to the end of
1827 the list. */
1828 last_occr = cur_occr;
1829 cur_occr = cur_occr->next;
1832 if (cur_occr)
1833 /* Found another instance of the expression in the same basic block.
1834 Prefer this occurrence to the currently recorded one. We want the
1835 last one in the block and the block is scanned from start to end. */
1836 cur_occr->insn = insn;
1837 else
1839 /* First occurrence of this expression in this basic block. */
1840 cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1841 bytes_used += sizeof (struct occr);
1843 /* First occurrence of this expression in any block? */
1844 if (cur_expr->avail_occr == NULL)
1845 cur_expr->avail_occr = cur_occr;
1846 else
1847 last_occr->next = cur_occr;
1849 cur_occr->insn = insn;
1850 cur_occr->next = NULL;
1854 /* Scan pattern PAT of INSN and add an entry to the hash table. If SET_P is
1855 non-zero, this is for the assignment hash table, otherwise it is for the
1856 expression hash table. */
1858 static void
1859 hash_scan_set (pat, insn, set_p)
1860 rtx pat, insn;
1861 int set_p;
1863 rtx src = SET_SRC (pat);
1864 rtx dest = SET_DEST (pat);
1866 if (GET_CODE (src) == CALL)
1867 hash_scan_call (src, insn);
1869 if (GET_CODE (dest) == REG)
1871 int regno = REGNO (dest);
1872 rtx tmp;
1874 /* Only record sets of pseudo-regs in the hash table. */
1875 if (! set_p
1876 && regno >= FIRST_PSEUDO_REGISTER
1877 /* Don't GCSE something if we can't do a reg/reg copy. */
1878 && can_copy_p [GET_MODE (dest)]
1879 /* Is SET_SRC something we want to gcse? */
1880 && want_to_gcse_p (src))
1882 /* An expression is not anticipatable if its operands are
1883 modified before this insn. */
1884 int antic_p = oprs_anticipatable_p (src, insn);
1885 /* An expression is not available if its operands are
1886 subsequently modified, including this insn. */
1887 int avail_p = oprs_available_p (src, insn);
1889 insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p);
1892 /* Record sets for constant/copy propagation. */
1893 else if (set_p
1894 && regno >= FIRST_PSEUDO_REGISTER
1895 && ((GET_CODE (src) == REG
1896 && REGNO (src) >= FIRST_PSEUDO_REGISTER
1897 && can_copy_p [GET_MODE (dest)])
1898 || GET_CODE (src) == CONST_INT
1899 || GET_CODE (src) == SYMBOL_REF
1900 || GET_CODE (src) == CONST_DOUBLE)
1901 /* A copy is not available if its src or dest is subsequently
1902 modified. Here we want to search from INSN+1 on, but
1903 oprs_available_p searches from INSN on. */
1904 && (insn == BLOCK_END (BLOCK_NUM (insn))
1905 || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
1906 && oprs_available_p (pat, tmp))))
1907 insert_set_in_table (pat, insn);
1911 static void
1912 hash_scan_clobber (x, insn)
1913 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1915 /* Currently nothing to do. */
1918 static void
1919 hash_scan_call (x, insn)
1920 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1922 /* Currently nothing to do. */
1925 /* Process INSN and add hash table entries as appropriate.
1927 Only available expressions that set a single pseudo-reg are recorded.
1929 Single sets in a PARALLEL could be handled, but it's an extra complication
1930 that isn't dealt with right now. The trick is handling the CLOBBERs that
1931 are also in the PARALLEL. Later.
1933 If SET_P is non-zero, this is for the assignment hash table,
1934 otherwise it is for the expression hash table.
1935 If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
1936 not record any expressions. */
1938 static void
1939 hash_scan_insn (insn, set_p, in_libcall_block)
1940 rtx insn;
1941 int set_p;
1942 int in_libcall_block;
1944 rtx pat = PATTERN (insn);
1945 int i;
1947 /* Pick out the sets of INSN and for other forms of instructions record
1948 what's been modified. */
1950 if (GET_CODE (pat) == SET && ! in_libcall_block)
1952 /* Ignore obvious no-ops. */
1953 if (SET_SRC (pat) != SET_DEST (pat))
1954 hash_scan_set (pat, insn, set_p);
1956 else if (GET_CODE (pat) == PARALLEL)
1957 for (i = 0; i < XVECLEN (pat, 0); i++)
1959 rtx x = XVECEXP (pat, 0, i);
1961 if (GET_CODE (x) == SET)
1963 if (GET_CODE (SET_SRC (x)) == CALL)
1964 hash_scan_call (SET_SRC (x), insn);
1966 else if (GET_CODE (x) == CLOBBER)
1967 hash_scan_clobber (x, insn);
1968 else if (GET_CODE (x) == CALL)
1969 hash_scan_call (x, insn);
1972 else if (GET_CODE (pat) == CLOBBER)
1973 hash_scan_clobber (pat, insn);
1974 else if (GET_CODE (pat) == CALL)
1975 hash_scan_call (pat, insn);
1978 static void
1979 dump_hash_table (file, name, table, table_size, total_size)
1980 FILE *file;
1981 const char *name;
1982 struct expr **table;
1983 int table_size, total_size;
1985 int i;
1986 /* Flattened out table, so it's printed in proper order. */
1987 struct expr **flat_table;
1988 unsigned int *hash_val;
1989 struct expr *expr;
1991 flat_table
1992 = (struct expr **) xcalloc (total_size, sizeof (struct expr *));
1993 hash_val = (unsigned int *) xmalloc (total_size * sizeof (unsigned int));
1995 for (i = 0; i < table_size; i++)
1996 for (expr = table[i]; expr != NULL; expr = expr->next_same_hash)
1998 flat_table[expr->bitmap_index] = expr;
1999 hash_val[expr->bitmap_index] = i;
2002 fprintf (file, "%s hash table (%d buckets, %d entries)\n",
2003 name, table_size, total_size);
2005 for (i = 0; i < total_size; i++)
2006 if (flat_table[i] != 0)
2008 expr = flat_table[i];
2009 fprintf (file, "Index %d (hash value %d)\n ",
2010 expr->bitmap_index, hash_val[i]);
2011 print_rtl (file, expr->expr);
2012 fprintf (file, "\n");
2015 fprintf (file, "\n");
2017 free (flat_table);
2018 free (hash_val);
2021 /* Record register first/last/block set information for REGNO in INSN.
2023 reg_first_set records the first place in the block where the register
2024 is set and is used to compute "anticipatability".
2026 reg_last_set records the last place in the block where the register
2027 is set and is used to compute "availability".
2029 reg_set_in_block records whether the register is set in the block
2030 and is used to compute "transparency". */
2032 static void
2033 record_last_reg_set_info (insn, regno)
2034 rtx insn;
2035 int regno;
2037 if (reg_first_set[regno] == NEVER_SET)
2038 reg_first_set[regno] = INSN_CUID (insn);
2040 reg_last_set[regno] = INSN_CUID (insn);
2041 SET_BIT (reg_set_in_block[BLOCK_NUM (insn)], regno);
2044 /* Record memory first/last/block set information for INSN. */
2046 static void
2047 record_last_mem_set_info (insn)
2048 rtx insn;
2050 if (mem_first_set == NEVER_SET)
2051 mem_first_set = INSN_CUID (insn);
2053 mem_last_set = INSN_CUID (insn);
2054 mem_set_in_block[BLOCK_NUM (insn)] = 1;
2057 /* Called from compute_hash_table via note_stores to handle one
2058 SET or CLOBBER in an insn. DATA is really the instruction in which
2059 the SET is taking place. */
2061 static void
2062 record_last_set_info (dest, setter, data)
2063 rtx dest, setter ATTRIBUTE_UNUSED;
2064 void *data;
2066 rtx last_set_insn = (rtx) data;
2068 if (GET_CODE (dest) == SUBREG)
2069 dest = SUBREG_REG (dest);
2071 if (GET_CODE (dest) == REG)
2072 record_last_reg_set_info (last_set_insn, REGNO (dest));
2073 else if (GET_CODE (dest) == MEM
2074 /* Ignore pushes, they clobber nothing. */
2075 && ! push_operand (dest, GET_MODE (dest)))
2076 record_last_mem_set_info (last_set_insn);
2079 /* Top level function to create an expression or assignment hash table.
2081 Expression entries are placed in the hash table if
2082 - they are of the form (set (pseudo-reg) src),
2083 - src is something we want to perform GCSE on,
2084 - none of the operands are subsequently modified in the block
2086 Assignment entries are placed in the hash table if
2087 - they are of the form (set (pseudo-reg) src),
2088 - src is something we want to perform const/copy propagation on,
2089 - none of the operands or target are subsequently modified in the block
2091 Currently src must be a pseudo-reg or a const_int.
2093 F is the first insn.
2094 SET_P is non-zero for computing the assignment hash table. */
2096 static void
2097 compute_hash_table (set_p)
2098 int set_p;
2100 int bb;
2102 /* While we compute the hash table we also compute a bit array of which
2103 registers are set in which blocks.
2104 We also compute which blocks set memory, in the absence of aliasing
2105 support [which is TODO].
2106 ??? This isn't needed during const/copy propagation, but it's cheap to
2107 compute. Later. */
2108 sbitmap_vector_zero (reg_set_in_block, n_basic_blocks);
2109 bzero ((char *) mem_set_in_block, n_basic_blocks);
2111 /* Some working arrays used to track first and last set in each block. */
2112 /* ??? One could use alloca here, but at some size a threshold is crossed
2113 beyond which one should use malloc. Are we at that threshold here? */
2114 reg_first_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2115 reg_last_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2117 for (bb = 0; bb < n_basic_blocks; bb++)
2119 rtx insn;
2120 unsigned int regno;
2121 int in_libcall_block;
2122 unsigned int i;
2124 /* First pass over the instructions records information used to
2125 determine when registers and memory are first and last set.
2126 ??? The mem_set_in_block and hard-reg reg_set_in_block computation
2127 could be moved to compute_sets since they currently don't change. */
2129 for (i = 0; i < max_gcse_regno; i++)
2130 reg_first_set[i] = reg_last_set[i] = NEVER_SET;
2132 mem_first_set = NEVER_SET;
2133 mem_last_set = NEVER_SET;
2135 for (insn = BLOCK_HEAD (bb);
2136 insn && insn != NEXT_INSN (BLOCK_END (bb));
2137 insn = NEXT_INSN (insn))
2139 #ifdef NON_SAVING_SETJMP
2140 if (NON_SAVING_SETJMP && GET_CODE (insn) == NOTE
2141 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)
2143 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2144 record_last_reg_set_info (insn, regno);
2145 continue;
2147 #endif
2149 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
2150 continue;
2152 if (GET_CODE (insn) == CALL_INSN)
2154 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2155 if ((call_used_regs[regno]
2156 && regno != STACK_POINTER_REGNUM
2157 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2158 && regno != HARD_FRAME_POINTER_REGNUM
2159 #endif
2160 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2161 && ! (regno == ARG_POINTER_REGNUM && fixed_regs[regno])
2162 #endif
2163 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2164 && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2165 #endif
2167 && regno != FRAME_POINTER_REGNUM)
2168 || global_regs[regno])
2169 record_last_reg_set_info (insn, regno);
2171 if (! CONST_CALL_P (insn))
2172 record_last_mem_set_info (insn);
2175 note_stores (PATTERN (insn), record_last_set_info, insn);
2178 /* The next pass builds the hash table. */
2180 for (insn = BLOCK_HEAD (bb), in_libcall_block = 0;
2181 insn && insn != NEXT_INSN (BLOCK_END (bb));
2182 insn = NEXT_INSN (insn))
2183 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
2185 if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2186 in_libcall_block = 1;
2187 else if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
2188 in_libcall_block = 0;
2189 hash_scan_insn (insn, set_p, in_libcall_block);
2193 free (reg_first_set);
2194 free (reg_last_set);
2196 /* Catch bugs early. */
2197 reg_first_set = reg_last_set = 0;
2200 /* Allocate space for the set hash table.
2201 N_INSNS is the number of instructions in the function.
2202 It is used to determine the number of buckets to use. */
2204 static void
2205 alloc_set_hash_table (n_insns)
2206 int n_insns;
2208 int n;
2210 set_hash_table_size = n_insns / 4;
2211 if (set_hash_table_size < 11)
2212 set_hash_table_size = 11;
2214 /* Attempt to maintain efficient use of hash table.
2215 Making it an odd number is simplest for now.
2216 ??? Later take some measurements. */
2217 set_hash_table_size |= 1;
2218 n = set_hash_table_size * sizeof (struct expr *);
2219 set_hash_table = (struct expr **) gmalloc (n);
2222 /* Free things allocated by alloc_set_hash_table. */
2224 static void
2225 free_set_hash_table ()
2227 free (set_hash_table);
2230 /* Compute the hash table for doing copy/const propagation. */
2232 static void
2233 compute_set_hash_table ()
2235 /* Initialize count of number of entries in hash table. */
2236 n_sets = 0;
2237 bzero ((char *) set_hash_table,
2238 set_hash_table_size * sizeof (struct expr *));
2240 compute_hash_table (1);
2243 /* Allocate space for the expression hash table.
2244 N_INSNS is the number of instructions in the function.
2245 It is used to determine the number of buckets to use. */
2247 static void
2248 alloc_expr_hash_table (n_insns)
2249 unsigned int n_insns;
2251 int n;
2253 expr_hash_table_size = n_insns / 2;
2254 /* Make sure the amount is usable. */
2255 if (expr_hash_table_size < 11)
2256 expr_hash_table_size = 11;
2258 /* Attempt to maintain efficient use of hash table.
2259 Making it an odd number is simplest for now.
2260 ??? Later take some measurements. */
2261 expr_hash_table_size |= 1;
2262 n = expr_hash_table_size * sizeof (struct expr *);
2263 expr_hash_table = (struct expr **) gmalloc (n);
2266 /* Free things allocated by alloc_expr_hash_table. */
2268 static void
2269 free_expr_hash_table ()
2271 free (expr_hash_table);
2274 /* Compute the hash table for doing GCSE. */
2276 static void
2277 compute_expr_hash_table ()
2279 /* Initialize count of number of entries in hash table. */
2280 n_exprs = 0;
2281 bzero ((char *) expr_hash_table,
2282 expr_hash_table_size * sizeof (struct expr *));
2284 compute_hash_table (0);
2287 /* Expression tracking support. */
2289 /* Lookup pattern PAT in the expression table.
2290 The result is a pointer to the table entry, or NULL if not found. */
2292 static struct expr *
2293 lookup_expr (pat)
2294 rtx pat;
2296 int do_not_record_p;
2297 unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p,
2298 expr_hash_table_size);
2299 struct expr *expr;
2301 if (do_not_record_p)
2302 return NULL;
2304 expr = expr_hash_table[hash];
2306 while (expr && ! expr_equiv_p (expr->expr, pat))
2307 expr = expr->next_same_hash;
2309 return expr;
2312 /* Lookup REGNO in the set table. If PAT is non-NULL look for the entry that
2313 matches it, otherwise return the first entry for REGNO. The result is a
2314 pointer to the table entry, or NULL if not found. */
2316 static struct expr *
2317 lookup_set (regno, pat)
2318 unsigned int regno;
2319 rtx pat;
2321 unsigned int hash = hash_set (regno, set_hash_table_size);
2322 struct expr *expr;
2324 expr = set_hash_table[hash];
2326 if (pat)
2328 while (expr && ! expr_equiv_p (expr->expr, pat))
2329 expr = expr->next_same_hash;
2331 else
2333 while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2334 expr = expr->next_same_hash;
2337 return expr;
2340 /* Return the next entry for REGNO in list EXPR. */
2342 static struct expr *
2343 next_set (regno, expr)
2344 unsigned int regno;
2345 struct expr *expr;
2348 expr = expr->next_same_hash;
2349 while (expr && REGNO (SET_DEST (expr->expr)) != regno);
2351 return expr;
2354 /* Reset tables used to keep track of what's still available [since the
2355 start of the block]. */
2357 static void
2358 reset_opr_set_tables ()
2360 /* Maintain a bitmap of which regs have been set since beginning of
2361 the block. */
2362 sbitmap_zero (reg_set_bitmap);
2364 /* Also keep a record of the last instruction to modify memory.
2365 For now this is very trivial, we only record whether any memory
2366 location has been modified. */
2367 mem_last_set = 0;
2370 /* Return non-zero if the operands of X are not set before INSN in
2371 INSN's basic block. */
2373 static int
2374 oprs_not_set_p (x, insn)
2375 rtx x, insn;
2377 int i, j;
2378 enum rtx_code code;
2379 const char *fmt;
2381 if (x == 0)
2382 return 1;
2384 code = GET_CODE (x);
2385 switch (code)
2387 case PC:
2388 case CC0:
2389 case CONST:
2390 case CONST_INT:
2391 case CONST_DOUBLE:
2392 case SYMBOL_REF:
2393 case LABEL_REF:
2394 case ADDR_VEC:
2395 case ADDR_DIFF_VEC:
2396 return 1;
2398 case MEM:
2399 if (mem_last_set != 0)
2400 return 0;
2401 else
2402 return oprs_not_set_p (XEXP (x, 0), insn);
2404 case REG:
2405 return ! TEST_BIT (reg_set_bitmap, REGNO (x));
2407 default:
2408 break;
2411 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2413 if (fmt[i] == 'e')
2415 /* If we are about to do the last recursive call
2416 needed at this level, change it into iteration.
2417 This function is called enough to be worth it. */
2418 if (i == 0)
2419 return oprs_not_set_p (XEXP (x, i), insn);
2421 if (! oprs_not_set_p (XEXP (x, i), insn))
2422 return 0;
2424 else if (fmt[i] == 'E')
2425 for (j = 0; j < XVECLEN (x, i); j++)
2426 if (! oprs_not_set_p (XVECEXP (x, i, j), insn))
2427 return 0;
2430 return 1;
2433 /* Mark things set by a CALL. */
2435 static void
2436 mark_call (insn)
2437 rtx insn;
2439 mem_last_set = INSN_CUID (insn);
2442 /* Mark things set by a SET. */
2444 static void
2445 mark_set (pat, insn)
2446 rtx pat, insn;
2448 rtx dest = SET_DEST (pat);
2450 while (GET_CODE (dest) == SUBREG
2451 || GET_CODE (dest) == ZERO_EXTRACT
2452 || GET_CODE (dest) == SIGN_EXTRACT
2453 || GET_CODE (dest) == STRICT_LOW_PART)
2454 dest = XEXP (dest, 0);
2456 if (GET_CODE (dest) == REG)
2457 SET_BIT (reg_set_bitmap, REGNO (dest));
2458 else if (GET_CODE (dest) == MEM)
2459 mem_last_set = INSN_CUID (insn);
2461 if (GET_CODE (SET_SRC (pat)) == CALL)
2462 mark_call (insn);
2465 /* Record things set by a CLOBBER. */
2467 static void
2468 mark_clobber (pat, insn)
2469 rtx pat, insn;
2471 rtx clob = XEXP (pat, 0);
2473 while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2474 clob = XEXP (clob, 0);
2476 if (GET_CODE (clob) == REG)
2477 SET_BIT (reg_set_bitmap, REGNO (clob));
2478 else
2479 mem_last_set = INSN_CUID (insn);
2482 /* Record things set by INSN.
2483 This data is used by oprs_not_set_p. */
2485 static void
2486 mark_oprs_set (insn)
2487 rtx insn;
2489 rtx pat = PATTERN (insn);
2490 int i;
2492 if (GET_CODE (pat) == SET)
2493 mark_set (pat, insn);
2494 else if (GET_CODE (pat) == PARALLEL)
2495 for (i = 0; i < XVECLEN (pat, 0); i++)
2497 rtx x = XVECEXP (pat, 0, i);
2499 if (GET_CODE (x) == SET)
2500 mark_set (x, insn);
2501 else if (GET_CODE (x) == CLOBBER)
2502 mark_clobber (x, insn);
2503 else if (GET_CODE (x) == CALL)
2504 mark_call (insn);
2507 else if (GET_CODE (pat) == CLOBBER)
2508 mark_clobber (pat, insn);
2509 else if (GET_CODE (pat) == CALL)
2510 mark_call (insn);
2514 /* Classic GCSE reaching definition support. */
2516 /* Allocate reaching def variables. */
2518 static void
2519 alloc_rd_mem (n_blocks, n_insns)
2520 int n_blocks, n_insns;
2522 rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2523 sbitmap_vector_zero (rd_kill, n_basic_blocks);
2525 rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2526 sbitmap_vector_zero (rd_gen, n_basic_blocks);
2528 reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2529 sbitmap_vector_zero (reaching_defs, n_basic_blocks);
2531 rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2532 sbitmap_vector_zero (rd_out, n_basic_blocks);
2535 /* Free reaching def variables. */
2537 static void
2538 free_rd_mem ()
2540 free (rd_kill);
2541 free (rd_gen);
2542 free (reaching_defs);
2543 free (rd_out);
2546 /* Add INSN to the kills of BB. REGNO, set in BB, is killed by INSN. */
2548 static void
2549 handle_rd_kill_set (insn, regno, bb)
2550 rtx insn;
2551 int regno, bb;
2553 struct reg_set *this_reg;
2555 for (this_reg = reg_set_table[regno]; this_reg; this_reg = this_reg ->next)
2556 if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn))
2557 SET_BIT (rd_kill[bb], INSN_CUID (this_reg->insn));
2560 /* Compute the set of kill's for reaching definitions. */
2562 static void
2563 compute_kill_rd ()
2565 int bb, cuid;
2566 int regno, i;
2568 /* For each block
2569 For each set bit in `gen' of the block (i.e each insn which
2570 generates a definition in the block)
2571 Call the reg set by the insn corresponding to that bit regx
2572 Look at the linked list starting at reg_set_table[regx]
2573 For each setting of regx in the linked list, which is not in
2574 this block
2575 Set the bit in `kill' corresponding to that insn. */
2576 for (bb = 0; bb < n_basic_blocks; bb++)
2577 for (cuid = 0; cuid < max_cuid; cuid++)
2578 if (TEST_BIT (rd_gen[bb], cuid))
2580 rtx insn = CUID_INSN (cuid);
2581 rtx pat = PATTERN (insn);
2583 if (GET_CODE (insn) == CALL_INSN)
2585 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2587 if ((call_used_regs[regno]
2588 && regno != STACK_POINTER_REGNUM
2589 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2590 && regno != HARD_FRAME_POINTER_REGNUM
2591 #endif
2592 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2593 && ! (regno == ARG_POINTER_REGNUM
2594 && fixed_regs[regno])
2595 #endif
2596 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2597 && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2598 #endif
2599 && regno != FRAME_POINTER_REGNUM)
2600 || global_regs[regno])
2601 handle_rd_kill_set (insn, regno, bb);
2605 if (GET_CODE (pat) == PARALLEL)
2607 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
2609 enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i));
2611 if ((code == SET || code == CLOBBER)
2612 && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG)
2613 handle_rd_kill_set (insn,
2614 REGNO (XEXP (XVECEXP (pat, 0, i), 0)),
2615 bb);
2618 else if (GET_CODE (pat) == SET && GET_CODE (SET_DEST (pat)) == REG)
2619 /* Each setting of this register outside of this block
2620 must be marked in the set of kills in this block. */
2621 handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), bb);
2625 /* Compute the reaching definitions as in
2626 Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
2627 Chapter 10. It is the same algorithm as used for computing available
2628 expressions but applied to the gens and kills of reaching definitions. */
2630 static void
2631 compute_rd ()
2633 int bb, changed, passes;
2635 for (bb = 0; bb < n_basic_blocks; bb++)
2636 sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/);
2638 passes = 0;
2639 changed = 1;
2640 while (changed)
2642 changed = 0;
2643 for (bb = 0; bb < n_basic_blocks; bb++)
2645 sbitmap_union_of_preds (reaching_defs[bb], rd_out, bb);
2646 changed |= sbitmap_union_of_diff (rd_out[bb], rd_gen[bb],
2647 reaching_defs[bb], rd_kill[bb]);
2649 passes++;
2652 if (gcse_file)
2653 fprintf (gcse_file, "reaching def computation: %d passes\n", passes);
2656 /* Classic GCSE available expression support. */
2658 /* Allocate memory for available expression computation. */
2660 static void
2661 alloc_avail_expr_mem (n_blocks, n_exprs)
2662 int n_blocks, n_exprs;
2664 ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2665 sbitmap_vector_zero (ae_kill, n_basic_blocks);
2667 ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2668 sbitmap_vector_zero (ae_gen, n_basic_blocks);
2670 ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2671 sbitmap_vector_zero (ae_in, n_basic_blocks);
2673 ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2674 sbitmap_vector_zero (ae_out, n_basic_blocks);
2676 u_bitmap = (sbitmap) sbitmap_alloc (n_exprs);
2677 sbitmap_ones (u_bitmap);
2680 static void
2681 free_avail_expr_mem ()
2683 free (ae_kill);
2684 free (ae_gen);
2685 free (ae_in);
2686 free (ae_out);
2687 free (u_bitmap);
2690 /* Compute the set of available expressions generated in each basic block. */
2692 static void
2693 compute_ae_gen ()
2695 unsigned int i;
2696 struct expr *expr;
2697 struct occr *occr;
2699 /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
2700 This is all we have to do because an expression is not recorded if it
2701 is not available, and the only expressions we want to work with are the
2702 ones that are recorded. */
2703 for (i = 0; i < expr_hash_table_size; i++)
2704 for (expr = expr_hash_table[i]; expr != 0; expr = expr->next_same_hash)
2705 for (occr = expr->avail_occr; occr != 0; occr = occr->next)
2706 SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index);
2709 /* Return non-zero if expression X is killed in BB. */
2711 static int
2712 expr_killed_p (x, bb)
2713 rtx x;
2714 int bb;
2716 int i, j;
2717 enum rtx_code code;
2718 const char *fmt;
2720 if (x == 0)
2721 return 1;
2723 code = GET_CODE (x);
2724 switch (code)
2726 case REG:
2727 return TEST_BIT (reg_set_in_block[bb], REGNO (x));
2729 case MEM:
2730 if (mem_set_in_block[bb])
2731 return 1;
2732 else
2733 return expr_killed_p (XEXP (x, 0), bb);
2735 case PC:
2736 case CC0: /*FIXME*/
2737 case CONST:
2738 case CONST_INT:
2739 case CONST_DOUBLE:
2740 case SYMBOL_REF:
2741 case LABEL_REF:
2742 case ADDR_VEC:
2743 case ADDR_DIFF_VEC:
2744 return 0;
2746 default:
2747 break;
2750 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2752 if (fmt[i] == 'e')
2754 /* If we are about to do the last recursive call
2755 needed at this level, change it into iteration.
2756 This function is called enough to be worth it. */
2757 if (i == 0)
2758 return expr_killed_p (XEXP (x, i), bb);
2759 else if (expr_killed_p (XEXP (x, i), bb))
2760 return 1;
2762 else if (fmt[i] == 'E')
2763 for (j = 0; j < XVECLEN (x, i); j++)
2764 if (expr_killed_p (XVECEXP (x, i, j), bb))
2765 return 1;
2768 return 0;
2771 /* Compute the set of available expressions killed in each basic block. */
2773 static void
2774 compute_ae_kill (ae_gen, ae_kill)
2775 sbitmap *ae_gen, *ae_kill;
2777 int bb;
2778 unsigned int i;
2779 struct expr *expr;
2781 for (bb = 0; bb < n_basic_blocks; bb++)
2782 for (i = 0; i < expr_hash_table_size; i++)
2783 for (expr = expr_hash_table[i]; expr; expr = expr->next_same_hash)
2785 /* Skip EXPR if generated in this block. */
2786 if (TEST_BIT (ae_gen[bb], expr->bitmap_index))
2787 continue;
2789 if (expr_killed_p (expr->expr, bb))
2790 SET_BIT (ae_kill[bb], expr->bitmap_index);
2794 /* Actually perform the Classic GCSE optimizations. */
2796 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
2798 CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
2799 as a positive reach. We want to do this when there are two computations
2800 of the expression in the block.
2802 VISITED is a pointer to a working buffer for tracking which BB's have
2803 been visited. It is NULL for the top-level call.
2805 We treat reaching expressions that go through blocks containing the same
2806 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
2807 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2808 2 as not reaching. The intent is to improve the probability of finding
2809 only one reaching expression and to reduce register lifetimes by picking
2810 the closest such expression. */
2812 static int
2813 expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited)
2814 struct occr *occr;
2815 struct expr *expr;
2816 int bb;
2817 int check_self_loop;
2818 char *visited;
2820 edge pred;
2822 for (pred = BASIC_BLOCK(bb)->pred; pred != NULL; pred = pred->pred_next)
2824 int pred_bb = pred->src->index;
2826 if (visited[pred_bb])
2827 /* This predecessor has already been visited. Nothing to do. */
2829 else if (pred_bb == bb)
2831 /* BB loops on itself. */
2832 if (check_self_loop
2833 && TEST_BIT (ae_gen[pred_bb], expr->bitmap_index)
2834 && BLOCK_NUM (occr->insn) == pred_bb)
2835 return 1;
2837 visited[pred_bb] = 1;
2840 /* Ignore this predecessor if it kills the expression. */
2841 else if (TEST_BIT (ae_kill[pred_bb], expr->bitmap_index))
2842 visited[pred_bb] = 1;
2844 /* Does this predecessor generate this expression? */
2845 else if (TEST_BIT (ae_gen[pred_bb], expr->bitmap_index))
2847 /* Is this the occurrence we're looking for?
2848 Note that there's only one generating occurrence per block
2849 so we just need to check the block number. */
2850 if (BLOCK_NUM (occr->insn) == pred_bb)
2851 return 1;
2853 visited[pred_bb] = 1;
2856 /* Neither gen nor kill. */
2857 else
2859 visited[pred_bb] = 1;
2860 if (expr_reaches_here_p_work (occr, expr, pred_bb, check_self_loop,
2861 visited))
2863 return 1;
2867 /* All paths have been checked. */
2868 return 0;
2871 /* This wrapper for expr_reaches_here_p_work() is to ensure that any
2872 memory allocated for that function is returned. */
2874 static int
2875 expr_reaches_here_p (occr, expr, bb, check_self_loop)
2876 struct occr *occr;
2877 struct expr *expr;
2878 int bb;
2879 int check_self_loop;
2881 int rval;
2882 char *visited = (char *) xcalloc (n_basic_blocks, 1);
2884 rval = expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited);
2886 free (visited);
2887 return rval;
2890 /* Return the instruction that computes EXPR that reaches INSN's basic block.
2891 If there is more than one such instruction, return NULL.
2893 Called only by handle_avail_expr. */
2895 static rtx
2896 computing_insn (expr, insn)
2897 struct expr *expr;
2898 rtx insn;
2900 int bb = BLOCK_NUM (insn);
2902 if (expr->avail_occr->next == NULL)
2904 if (BLOCK_NUM (expr->avail_occr->insn) == bb)
2905 /* The available expression is actually itself
2906 (i.e. a loop in the flow graph) so do nothing. */
2907 return NULL;
2909 /* (FIXME) Case that we found a pattern that was created by
2910 a substitution that took place. */
2911 return expr->avail_occr->insn;
2913 else
2915 /* Pattern is computed more than once.
2916 Search backwards from this insn to see how many of these
2917 computations actually reach this insn. */
2918 struct occr *occr;
2919 rtx insn_computes_expr = NULL;
2920 int can_reach = 0;
2922 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
2924 if (BLOCK_NUM (occr->insn) == bb)
2926 /* The expression is generated in this block.
2927 The only time we care about this is when the expression
2928 is generated later in the block [and thus there's a loop].
2929 We let the normal cse pass handle the other cases. */
2930 if (INSN_CUID (insn) < INSN_CUID (occr->insn)
2931 && expr_reaches_here_p (occr, expr, bb, 1))
2933 can_reach++;
2934 if (can_reach > 1)
2935 return NULL;
2937 insn_computes_expr = occr->insn;
2940 else if (expr_reaches_here_p (occr, expr, bb, 0))
2942 can_reach++;
2943 if (can_reach > 1)
2944 return NULL;
2946 insn_computes_expr = occr->insn;
2950 if (insn_computes_expr == NULL)
2951 abort ();
2953 return insn_computes_expr;
2957 /* Return non-zero if the definition in DEF_INSN can reach INSN.
2958 Only called by can_disregard_other_sets. */
2960 static int
2961 def_reaches_here_p (insn, def_insn)
2962 rtx insn, def_insn;
2964 rtx reg;
2966 if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn)))
2967 return 1;
2969 if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn))
2971 if (INSN_CUID (def_insn) < INSN_CUID (insn))
2973 if (GET_CODE (PATTERN (def_insn)) == PARALLEL)
2974 return 1;
2975 else if (GET_CODE (PATTERN (def_insn)) == CLOBBER)
2976 reg = XEXP (PATTERN (def_insn), 0);
2977 else if (GET_CODE (PATTERN (def_insn)) == SET)
2978 reg = SET_DEST (PATTERN (def_insn));
2979 else
2980 abort ();
2982 return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn);
2984 else
2985 return 0;
2988 return 0;
2991 /* Return non-zero if *ADDR_THIS_REG can only have one value at INSN. The
2992 value returned is the number of definitions that reach INSN. Returning a
2993 value of zero means that [maybe] more than one definition reaches INSN and
2994 the caller can't perform whatever optimization it is trying. i.e. it is
2995 always safe to return zero. */
2997 static int
2998 can_disregard_other_sets (addr_this_reg, insn, for_combine)
2999 struct reg_set **addr_this_reg;
3000 rtx insn;
3001 int for_combine;
3003 int number_of_reaching_defs = 0;
3004 struct reg_set *this_reg;
3006 for (this_reg = *addr_this_reg; this_reg != 0; this_reg = this_reg->next)
3007 if (def_reaches_here_p (insn, this_reg->insn))
3009 number_of_reaching_defs++;
3010 /* Ignore parallels for now. */
3011 if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL)
3012 return 0;
3014 if (!for_combine
3015 && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER
3016 || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3017 SET_SRC (PATTERN (insn)))))
3018 /* A setting of the reg to a different value reaches INSN. */
3019 return 0;
3021 if (number_of_reaching_defs > 1)
3023 /* If in this setting the value the register is being set to is
3024 equal to the previous value the register was set to and this
3025 setting reaches the insn we are trying to do the substitution
3026 on then we are ok. */
3027 if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER)
3028 return 0;
3029 else if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3030 SET_SRC (PATTERN (insn))))
3031 return 0;
3034 *addr_this_reg = this_reg;
3037 return number_of_reaching_defs;
3040 /* Expression computed by insn is available and the substitution is legal,
3041 so try to perform the substitution.
3043 The result is non-zero if any changes were made. */
3045 static int
3046 handle_avail_expr (insn, expr)
3047 rtx insn;
3048 struct expr *expr;
3050 rtx pat, insn_computes_expr;
3051 rtx to;
3052 struct reg_set *this_reg;
3053 int found_setting, use_src;
3054 int changed = 0;
3056 /* We only handle the case where one computation of the expression
3057 reaches this instruction. */
3058 insn_computes_expr = computing_insn (expr, insn);
3059 if (insn_computes_expr == NULL)
3060 return 0;
3062 found_setting = 0;
3063 use_src = 0;
3065 /* At this point we know only one computation of EXPR outside of this
3066 block reaches this insn. Now try to find a register that the
3067 expression is computed into. */
3068 if (GET_CODE (SET_SRC (PATTERN (insn_computes_expr))) == REG)
3070 /* This is the case when the available expression that reaches
3071 here has already been handled as an available expression. */
3072 unsigned int regnum_for_replacing
3073 = REGNO (SET_SRC (PATTERN (insn_computes_expr)));
3075 /* If the register was created by GCSE we can't use `reg_set_table',
3076 however we know it's set only once. */
3077 if (regnum_for_replacing >= max_gcse_regno
3078 /* If the register the expression is computed into is set only once,
3079 or only one set reaches this insn, we can use it. */
3080 || (((this_reg = reg_set_table[regnum_for_replacing]),
3081 this_reg->next == NULL)
3082 || can_disregard_other_sets (&this_reg, insn, 0)))
3084 use_src = 1;
3085 found_setting = 1;
3089 if (!found_setting)
3091 unsigned int regnum_for_replacing
3092 = REGNO (SET_DEST (PATTERN (insn_computes_expr)));
3094 /* This shouldn't happen. */
3095 if (regnum_for_replacing >= max_gcse_regno)
3096 abort ();
3098 this_reg = reg_set_table[regnum_for_replacing];
3100 /* If the register the expression is computed into is set only once,
3101 or only one set reaches this insn, use it. */
3102 if (this_reg->next == NULL
3103 || can_disregard_other_sets (&this_reg, insn, 0))
3104 found_setting = 1;
3107 if (found_setting)
3109 pat = PATTERN (insn);
3110 if (use_src)
3111 to = SET_SRC (PATTERN (insn_computes_expr));
3112 else
3113 to = SET_DEST (PATTERN (insn_computes_expr));
3114 changed = validate_change (insn, &SET_SRC (pat), to, 0);
3116 /* We should be able to ignore the return code from validate_change but
3117 to play it safe we check. */
3118 if (changed)
3120 gcse_subst_count++;
3121 if (gcse_file != NULL)
3123 fprintf (gcse_file, "GCSE: Replacing the source in insn %d with",
3124 INSN_UID (insn));
3125 fprintf (gcse_file, " reg %d %s insn %d\n",
3126 REGNO (to), use_src ? "from" : "set in",
3127 INSN_UID (insn_computes_expr));
3132 /* The register that the expr is computed into is set more than once. */
3133 else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3135 /* Insert an insn after insnx that copies the reg set in insnx
3136 into a new pseudo register call this new register REGN.
3137 From insnb until end of basic block or until REGB is set
3138 replace all uses of REGB with REGN. */
3139 rtx new_insn;
3141 to = gen_reg_rtx (GET_MODE (SET_DEST (PATTERN (insn_computes_expr))));
3143 /* Generate the new insn. */
3144 /* ??? If the change fails, we return 0, even though we created
3145 an insn. I think this is ok. */
3146 new_insn
3147 = emit_insn_after (gen_rtx_SET (VOIDmode, to,
3148 SET_DEST (PATTERN
3149 (insn_computes_expr))),
3150 insn_computes_expr);
3152 /* Keep block number table up to date. */
3153 set_block_num (new_insn, BLOCK_NUM (insn_computes_expr));
3155 /* Keep register set table up to date. */
3156 record_one_set (REGNO (to), new_insn);
3158 gcse_create_count++;
3159 if (gcse_file != NULL)
3161 fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d",
3162 INSN_UID (NEXT_INSN (insn_computes_expr)),
3163 REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))));
3164 fprintf (gcse_file, ", computed in insn %d,\n",
3165 INSN_UID (insn_computes_expr));
3166 fprintf (gcse_file, " into newly allocated reg %d\n",
3167 REGNO (to));
3170 pat = PATTERN (insn);
3172 /* Do register replacement for INSN. */
3173 changed = validate_change (insn, &SET_SRC (pat),
3174 SET_DEST (PATTERN
3175 (NEXT_INSN (insn_computes_expr))),
3178 /* We should be able to ignore the return code from validate_change but
3179 to play it safe we check. */
3180 if (changed)
3182 gcse_subst_count++;
3183 if (gcse_file != NULL)
3185 fprintf (gcse_file,
3186 "GCSE: Replacing the source in insn %d with reg %d ",
3187 INSN_UID (insn),
3188 REGNO (SET_DEST (PATTERN (NEXT_INSN
3189 (insn_computes_expr)))));
3190 fprintf (gcse_file, "set in insn %d\n",
3191 INSN_UID (insn_computes_expr));
3196 return changed;
3199 /* Perform classic GCSE. This is called by one_classic_gcse_pass after all
3200 the dataflow analysis has been done.
3202 The result is non-zero if a change was made. */
3204 static int
3205 classic_gcse ()
3207 int bb, changed;
3208 rtx insn;
3210 /* Note we start at block 1. */
3212 changed = 0;
3213 for (bb = 1; bb < n_basic_blocks; bb++)
3215 /* Reset tables used to keep track of what's still valid [since the
3216 start of the block]. */
3217 reset_opr_set_tables ();
3219 for (insn = BLOCK_HEAD (bb);
3220 insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3221 insn = NEXT_INSN (insn))
3223 /* Is insn of form (set (pseudo-reg) ...)? */
3224 if (GET_CODE (insn) == INSN
3225 && GET_CODE (PATTERN (insn)) == SET
3226 && GET_CODE (SET_DEST (PATTERN (insn))) == REG
3227 && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER)
3229 rtx pat = PATTERN (insn);
3230 rtx src = SET_SRC (pat);
3231 struct expr *expr;
3233 if (want_to_gcse_p (src)
3234 /* Is the expression recorded? */
3235 && ((expr = lookup_expr (src)) != NULL)
3236 /* Is the expression available [at the start of the
3237 block]? */
3238 && TEST_BIT (ae_in[bb], expr->bitmap_index)
3239 /* Are the operands unchanged since the start of the
3240 block? */
3241 && oprs_not_set_p (src, insn))
3242 changed |= handle_avail_expr (insn, expr);
3245 /* Keep track of everything modified by this insn. */
3246 /* ??? Need to be careful w.r.t. mods done to INSN. */
3247 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
3248 mark_oprs_set (insn);
3252 return changed;
3255 /* Top level routine to perform one classic GCSE pass.
3257 Return non-zero if a change was made. */
3259 static int
3260 one_classic_gcse_pass (pass)
3261 int pass;
3263 int changed = 0;
3265 gcse_subst_count = 0;
3266 gcse_create_count = 0;
3268 alloc_expr_hash_table (max_cuid);
3269 alloc_rd_mem (n_basic_blocks, max_cuid);
3270 compute_expr_hash_table ();
3271 if (gcse_file)
3272 dump_hash_table (gcse_file, "Expression", expr_hash_table,
3273 expr_hash_table_size, n_exprs);
3275 if (n_exprs > 0)
3277 compute_kill_rd ();
3278 compute_rd ();
3279 alloc_avail_expr_mem (n_basic_blocks, n_exprs);
3280 compute_ae_gen ();
3281 compute_ae_kill (ae_gen, ae_kill);
3282 compute_available (ae_gen, ae_kill, ae_out, ae_in);
3283 changed = classic_gcse ();
3284 free_avail_expr_mem ();
3287 free_rd_mem ();
3288 free_expr_hash_table ();
3290 if (gcse_file)
3292 fprintf (gcse_file, "\n");
3293 fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs,",
3294 current_function_name, pass, bytes_used, gcse_subst_count);
3295 fprintf (gcse_file, "%d insns created\n", gcse_create_count);
3298 return changed;
3301 /* Compute copy/constant propagation working variables. */
3303 /* Local properties of assignments. */
3304 static sbitmap *cprop_pavloc;
3305 static sbitmap *cprop_absaltered;
3307 /* Global properties of assignments (computed from the local properties). */
3308 static sbitmap *cprop_avin;
3309 static sbitmap *cprop_avout;
3311 /* Allocate vars used for copy/const propagation. N_BLOCKS is the number of
3312 basic blocks. N_SETS is the number of sets. */
3314 static void
3315 alloc_cprop_mem (n_blocks, n_sets)
3316 int n_blocks, n_sets;
3318 cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
3319 cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
3321 cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
3322 cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
3325 /* Free vars used by copy/const propagation. */
3327 static void
3328 free_cprop_mem ()
3330 free (cprop_pavloc);
3331 free (cprop_absaltered);
3332 free (cprop_avin);
3333 free (cprop_avout);
3336 /* For each block, compute whether X is transparent. X is either an
3337 expression or an assignment [though we don't care which, for this context
3338 an assignment is treated as an expression]. For each block where an
3339 element of X is modified, set (SET_P == 1) or reset (SET_P == 0) the INDX
3340 bit in BMAP. */
3342 static void
3343 compute_transp (x, indx, bmap, set_p)
3344 rtx x;
3345 int indx;
3346 sbitmap *bmap;
3347 int set_p;
3349 int bb, i, j;
3350 enum rtx_code code;
3351 reg_set *r;
3352 const char *fmt;
3354 /* repeat is used to turn tail-recursion into iteration since GCC
3355 can't do it when there's no return value. */
3356 repeat:
3358 if (x == 0)
3359 return;
3361 code = GET_CODE (x);
3362 switch (code)
3364 case REG:
3365 if (set_p)
3367 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3369 for (bb = 0; bb < n_basic_blocks; bb++)
3370 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3371 SET_BIT (bmap[bb], indx);
3373 else
3375 for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3376 SET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3379 else
3381 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3383 for (bb = 0; bb < n_basic_blocks; bb++)
3384 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3385 RESET_BIT (bmap[bb], indx);
3387 else
3389 for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3390 RESET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3394 return;
3396 case MEM:
3397 if (set_p)
3399 for (bb = 0; bb < n_basic_blocks; bb++)
3400 if (mem_set_in_block[bb])
3401 SET_BIT (bmap[bb], indx);
3403 else
3405 for (bb = 0; bb < n_basic_blocks; bb++)
3406 if (mem_set_in_block[bb])
3407 RESET_BIT (bmap[bb], indx);
3410 x = XEXP (x, 0);
3411 goto repeat;
3413 case PC:
3414 case CC0: /*FIXME*/
3415 case CONST:
3416 case CONST_INT:
3417 case CONST_DOUBLE:
3418 case SYMBOL_REF:
3419 case LABEL_REF:
3420 case ADDR_VEC:
3421 case ADDR_DIFF_VEC:
3422 return;
3424 default:
3425 break;
3428 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3430 if (fmt[i] == 'e')
3432 /* If we are about to do the last recursive call
3433 needed at this level, change it into iteration.
3434 This function is called enough to be worth it. */
3435 if (i == 0)
3437 x = XEXP (x, i);
3438 goto repeat;
3441 compute_transp (XEXP (x, i), indx, bmap, set_p);
3443 else if (fmt[i] == 'E')
3444 for (j = 0; j < XVECLEN (x, i); j++)
3445 compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
3449 /* Top level routine to do the dataflow analysis needed by copy/const
3450 propagation. */
3452 static void
3453 compute_cprop_data ()
3455 compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, 1);
3456 compute_available (cprop_pavloc, cprop_absaltered,
3457 cprop_avout, cprop_avin);
3460 /* Copy/constant propagation. */
3462 /* Maximum number of register uses in an insn that we handle. */
3463 #define MAX_USES 8
3465 /* Table of uses found in an insn.
3466 Allocated statically to avoid alloc/free complexity and overhead. */
3467 static struct reg_use reg_use_table[MAX_USES];
3469 /* Index into `reg_use_table' while building it. */
3470 static int reg_use_count;
3472 /* Set up a list of register numbers used in INSN. The found uses are stored
3473 in `reg_use_table'. `reg_use_count' is initialized to zero before entry,
3474 and contains the number of uses in the table upon exit.
3476 ??? If a register appears multiple times we will record it multiple times.
3477 This doesn't hurt anything but it will slow things down. */
3479 static void
3480 find_used_regs (x)
3481 rtx x;
3483 int i, j;
3484 enum rtx_code code;
3485 const char *fmt;
3487 /* repeat is used to turn tail-recursion into iteration since GCC
3488 can't do it when there's no return value. */
3489 repeat:
3491 if (x == 0)
3492 return;
3494 code = GET_CODE (x);
3495 switch (code)
3497 case REG:
3498 if (reg_use_count == MAX_USES)
3499 return;
3501 reg_use_table[reg_use_count].reg_rtx = x;
3502 reg_use_count++;
3503 return;
3505 case MEM:
3506 x = XEXP (x, 0);
3507 goto repeat;
3509 case PC:
3510 case CC0:
3511 case CONST:
3512 case CONST_INT:
3513 case CONST_DOUBLE:
3514 case SYMBOL_REF:
3515 case LABEL_REF:
3516 case CLOBBER:
3517 case ADDR_VEC:
3518 case ADDR_DIFF_VEC:
3519 case ASM_INPUT: /*FIXME*/
3520 return;
3522 case SET:
3523 if (GET_CODE (SET_DEST (x)) == MEM)
3524 find_used_regs (SET_DEST (x));
3525 x = SET_SRC (x);
3526 goto repeat;
3528 default:
3529 break;
3532 /* Recursively scan the operands of this expression. */
3534 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3536 if (fmt[i] == 'e')
3538 /* If we are about to do the last recursive call
3539 needed at this level, change it into iteration.
3540 This function is called enough to be worth it. */
3541 if (i == 0)
3543 x = XEXP (x, 0);
3544 goto repeat;
3547 find_used_regs (XEXP (x, i));
3549 else if (fmt[i] == 'E')
3550 for (j = 0; j < XVECLEN (x, i); j++)
3551 find_used_regs (XVECEXP (x, i, j));
3555 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3556 Returns non-zero is successful. */
3558 static int
3559 try_replace_reg (from, to, insn)
3560 rtx from, to, insn;
3562 rtx note;
3563 rtx src;
3564 int success;
3565 rtx set;
3567 note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3569 if (!note)
3570 note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3572 /* If this fails we could try to simplify the result of the
3573 replacement and attempt to recognize the simplified insn.
3575 But we need a general simplify_rtx that doesn't have pass
3576 specific state variables. I'm not aware of one at the moment. */
3578 success = validate_replace_src (from, to, insn);
3579 set = single_set (insn);
3581 /* We've failed to do replacement. Try to add REG_EQUAL note to not loose
3582 information. */
3583 if (!success && !note)
3585 if (!set)
3586 return 0;
3588 note = REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL,
3589 copy_rtx (SET_SRC (set)),
3590 REG_NOTES (insn));
3593 /* Always do the replacement in REQ_EQUAL and REG_EQUIV notes. Also
3594 try to simplify them. */
3595 if (note)
3597 rtx simplified;
3599 src = XEXP (note, 0);
3600 replace_rtx (src, from, to);
3602 /* Try to simplify resulting note. */
3603 simplified = simplify_rtx (src);
3604 if (simplified)
3606 src = simplified;
3607 XEXP (note, 0) = src;
3610 /* REG_EQUAL may get simplified into register.
3611 We don't allow that. Remove that note. This code ought
3612 not to hapen, because previous code ought to syntetize
3613 reg-reg move, but be on the safe side. */
3614 else if (REG_P (src))
3615 remove_note (insn, note);
3617 return success;
3620 /* Find a set of REGNOs that are available on entry to INSN's block. Returns
3621 NULL no such set is found. */
3623 static struct expr *
3624 find_avail_set (regno, insn)
3625 int regno;
3626 rtx insn;
3628 /* SET1 contains the last set found that can be returned to the caller for
3629 use in a substitution. */
3630 struct expr *set1 = 0;
3632 /* Loops are not possible here. To get a loop we would need two sets
3633 available at the start of the block containing INSN. ie we would
3634 need two sets like this available at the start of the block:
3636 (set (reg X) (reg Y))
3637 (set (reg Y) (reg X))
3639 This can not happen since the set of (reg Y) would have killed the
3640 set of (reg X) making it unavailable at the start of this block. */
3641 while (1)
3643 rtx src;
3644 struct expr *set = lookup_set (regno, NULL_RTX);
3646 /* Find a set that is available at the start of the block
3647 which contains INSN. */
3648 while (set)
3650 if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
3651 break;
3652 set = next_set (regno, set);
3655 /* If no available set was found we've reached the end of the
3656 (possibly empty) copy chain. */
3657 if (set == 0)
3658 break;
3660 if (GET_CODE (set->expr) != SET)
3661 abort ();
3663 src = SET_SRC (set->expr);
3665 /* We know the set is available.
3666 Now check that SRC is ANTLOC (i.e. none of the source operands
3667 have changed since the start of the block).
3669 If the source operand changed, we may still use it for the next
3670 iteration of this loop, but we may not use it for substitutions. */
3672 if (CONSTANT_P (src) || oprs_not_set_p (src, insn))
3673 set1 = set;
3675 /* If the source of the set is anything except a register, then
3676 we have reached the end of the copy chain. */
3677 if (GET_CODE (src) != REG)
3678 break;
3680 /* Follow the copy chain, ie start another iteration of the loop
3681 and see if we have an available copy into SRC. */
3682 regno = REGNO (src);
3685 /* SET1 holds the last set that was available and anticipatable at
3686 INSN. */
3687 return set1;
3690 /* Subroutine of cprop_insn that tries to propagate constants into
3691 JUMP_INSNS. INSN must be a conditional jump; COPY is a copy of it
3692 that we can use for substitutions.
3693 REG_USED is the use we will try to replace, SRC is the constant we
3694 will try to substitute for it.
3695 Returns nonzero if a change was made. */
3697 static int
3698 cprop_jump (insn, copy, reg_used, src)
3699 rtx insn, copy;
3700 struct reg_use *reg_used;
3701 rtx src;
3703 rtx set = PATTERN (copy);
3704 rtx temp;
3706 /* Replace the register with the appropriate constant. */
3707 replace_rtx (SET_SRC (set), reg_used->reg_rtx, src);
3709 temp = simplify_ternary_operation (GET_CODE (SET_SRC (set)),
3710 GET_MODE (SET_SRC (set)),
3711 GET_MODE (XEXP (SET_SRC (set), 0)),
3712 XEXP (SET_SRC (set), 0),
3713 XEXP (SET_SRC (set), 1),
3714 XEXP (SET_SRC (set), 2));
3716 /* If no simplification can be made, then try the next
3717 register. */
3718 if (temp == 0)
3719 return 0;
3721 SET_SRC (set) = temp;
3723 /* That may have changed the structure of TEMP, so
3724 force it to be rerecognized if it has not turned
3725 into a nop or unconditional jump. */
3727 INSN_CODE (copy) = -1;
3728 if ((SET_DEST (set) == pc_rtx
3729 && (SET_SRC (set) == pc_rtx
3730 || GET_CODE (SET_SRC (set)) == LABEL_REF))
3731 || recog (PATTERN (copy), copy, NULL) >= 0)
3733 /* This has either become an unconditional jump
3734 or a nop-jump. We'd like to delete nop jumps
3735 here, but doing so confuses gcse. So we just
3736 make the replacement and let later passes
3737 sort things out. */
3738 PATTERN (insn) = set;
3739 INSN_CODE (insn) = -1;
3741 /* One less use of the label this insn used to jump to
3742 if we turned this into a NOP jump. */
3743 if (SET_SRC (set) == pc_rtx && JUMP_LABEL (insn) != 0)
3744 --LABEL_NUSES (JUMP_LABEL (insn));
3746 /* If this has turned into an unconditional jump,
3747 then put a barrier after it so that the unreachable
3748 code will be deleted. */
3749 if (GET_CODE (SET_SRC (set)) == LABEL_REF)
3750 emit_barrier_after (insn);
3752 run_jump_opt_after_gcse = 1;
3754 const_prop_count++;
3755 if (gcse_file != NULL)
3757 fprintf (gcse_file,
3758 "CONST-PROP: Replacing reg %d in insn %d with constant ",
3759 REGNO (reg_used->reg_rtx), INSN_UID (insn));
3760 print_rtl (gcse_file, src);
3761 fprintf (gcse_file, "\n");
3764 return 1;
3766 return 0;
3769 #ifdef HAVE_cc0
3771 /* Subroutine of cprop_insn that tries to propagate constants into JUMP_INSNS
3772 for machines that have CC0. INSN is a single set that stores into CC0;
3773 the insn following it is a conditional jump. REG_USED is the use we will
3774 try to replace, SRC is the constant we will try to substitute for it.
3775 Returns nonzero if a change was made. */
3777 static int
3778 cprop_cc0_jump (insn, reg_used, src)
3779 rtx insn;
3780 struct reg_use *reg_used;
3781 rtx src;
3783 rtx jump = NEXT_INSN (insn);
3784 rtx copy = copy_rtx (jump);
3785 rtx set = PATTERN (copy);
3787 /* We need to copy the source of the cc0 setter, as cprop_jump is going to
3788 substitute into it. */
3789 replace_rtx (SET_SRC (set), cc0_rtx, copy_rtx (SET_SRC (PATTERN (insn))));
3790 if (! cprop_jump (jump, copy, reg_used, src))
3791 return 0;
3793 /* If we succeeded, delete the cc0 setter. */
3794 PUT_CODE (insn, NOTE);
3795 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3796 NOTE_SOURCE_FILE (insn) = 0;
3797 return 1;
3799 #endif
3801 /* Perform constant and copy propagation on INSN.
3802 The result is non-zero if a change was made. */
3804 static int
3805 cprop_insn (insn, alter_jumps)
3806 rtx insn;
3807 int alter_jumps;
3809 struct reg_use *reg_used;
3810 int changed = 0;
3811 rtx note;
3813 /* Only propagate into SETs. Note that a conditional jump is a
3814 SET with pc_rtx as the destination. */
3815 if ((GET_CODE (insn) != INSN
3816 && GET_CODE (insn) != JUMP_INSN)
3817 || GET_CODE (PATTERN (insn)) != SET)
3818 return 0;
3820 reg_use_count = 0;
3821 find_used_regs (PATTERN (insn));
3823 note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3824 if (!note)
3825 note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3827 /* We may win even when propagating constants into notes. */
3828 if (note)
3829 find_used_regs (XEXP (note, 0));
3831 for (reg_used = &reg_use_table[0]; reg_use_count > 0;
3832 reg_used++, reg_use_count--)
3834 unsigned int regno = REGNO (reg_used->reg_rtx);
3835 rtx pat, src;
3836 struct expr *set;
3838 /* Ignore registers created by GCSE.
3839 We do this because ... */
3840 if (regno >= max_gcse_regno)
3841 continue;
3843 /* If the register has already been set in this block, there's
3844 nothing we can do. */
3845 if (! oprs_not_set_p (reg_used->reg_rtx, insn))
3846 continue;
3848 /* Find an assignment that sets reg_used and is available
3849 at the start of the block. */
3850 set = find_avail_set (regno, insn);
3851 if (! set)
3852 continue;
3854 pat = set->expr;
3855 /* ??? We might be able to handle PARALLELs. Later. */
3856 if (GET_CODE (pat) != SET)
3857 abort ();
3859 src = SET_SRC (pat);
3861 /* Constant propagation. */
3862 if (GET_CODE (src) == CONST_INT || GET_CODE (src) == CONST_DOUBLE
3863 || GET_CODE (src) == SYMBOL_REF)
3865 /* Handle normal insns first. */
3866 if (GET_CODE (insn) == INSN
3867 && try_replace_reg (reg_used->reg_rtx, src, insn))
3869 changed = 1;
3870 const_prop_count++;
3871 if (gcse_file != NULL)
3873 fprintf (gcse_file, "CONST-PROP: Replacing reg %d in ",
3874 regno);
3875 fprintf (gcse_file, "insn %d with constant ",
3876 INSN_UID (insn));
3877 print_rtl (gcse_file, src);
3878 fprintf (gcse_file, "\n");
3881 /* The original insn setting reg_used may or may not now be
3882 deletable. We leave the deletion to flow. */
3885 /* Try to propagate a CONST_INT into a conditional jump.
3886 We're pretty specific about what we will handle in this
3887 code, we can extend this as necessary over time.
3889 Right now the insn in question must look like
3890 (set (pc) (if_then_else ...)) */
3891 else if (alter_jumps
3892 && GET_CODE (insn) == JUMP_INSN
3893 && condjump_p (insn)
3894 && ! simplejump_p (insn))
3895 changed |= cprop_jump (insn, copy_rtx (insn), reg_used, src);
3896 #ifdef HAVE_cc0
3897 /* Similar code for machines that use a pair of CC0 setter and
3898 conditional jump insn. */
3899 else if (alter_jumps
3900 && GET_CODE (PATTERN (insn)) == SET
3901 && SET_DEST (PATTERN (insn)) == cc0_rtx
3902 && GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
3903 && condjump_p (NEXT_INSN (insn))
3904 && ! simplejump_p (NEXT_INSN (insn)))
3905 changed |= cprop_cc0_jump (insn, reg_used, src);
3906 #endif
3908 else if (GET_CODE (src) == REG
3909 && REGNO (src) >= FIRST_PSEUDO_REGISTER
3910 && REGNO (src) != regno)
3912 if (try_replace_reg (reg_used->reg_rtx, src, insn))
3914 changed = 1;
3915 copy_prop_count++;
3916 if (gcse_file != NULL)
3918 fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d",
3919 regno, INSN_UID (insn));
3920 fprintf (gcse_file, " with reg %d\n", REGNO (src));
3923 /* The original insn setting reg_used may or may not now be
3924 deletable. We leave the deletion to flow. */
3925 /* FIXME: If it turns out that the insn isn't deletable,
3926 then we may have unnecessarily extended register lifetimes
3927 and made things worse. */
3932 return changed;
3935 /* Forward propagate copies. This includes copies and constants. Return
3936 non-zero if a change was made. */
3938 static int
3939 cprop (alter_jumps)
3940 int alter_jumps;
3942 int bb, changed;
3943 rtx insn;
3945 /* Note we start at block 1. */
3947 changed = 0;
3948 for (bb = 1; bb < n_basic_blocks; bb++)
3950 /* Reset tables used to keep track of what's still valid [since the
3951 start of the block]. */
3952 reset_opr_set_tables ();
3954 for (insn = BLOCK_HEAD (bb);
3955 insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3956 insn = NEXT_INSN (insn))
3958 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
3960 changed |= cprop_insn (insn, alter_jumps);
3962 /* Keep track of everything modified by this insn. */
3963 /* ??? Need to be careful w.r.t. mods done to INSN. Don't
3964 call mark_oprs_set if we turned the insn into a NOTE. */
3965 if (GET_CODE (insn) != NOTE)
3966 mark_oprs_set (insn);
3971 if (gcse_file != NULL)
3972 fprintf (gcse_file, "\n");
3974 return changed;
3977 /* Perform one copy/constant propagation pass.
3978 F is the first insn in the function.
3979 PASS is the pass count. */
3981 static int
3982 one_cprop_pass (pass, alter_jumps)
3983 int pass;
3984 int alter_jumps;
3986 int changed = 0;
3988 const_prop_count = 0;
3989 copy_prop_count = 0;
3991 alloc_set_hash_table (max_cuid);
3992 compute_set_hash_table ();
3993 if (gcse_file)
3994 dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size,
3995 n_sets);
3996 if (n_sets > 0)
3998 alloc_cprop_mem (n_basic_blocks, n_sets);
3999 compute_cprop_data ();
4000 changed = cprop (alter_jumps);
4001 free_cprop_mem ();
4004 free_set_hash_table ();
4006 if (gcse_file)
4008 fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, ",
4009 current_function_name, pass, bytes_used);
4010 fprintf (gcse_file, "%d const props, %d copy props\n\n",
4011 const_prop_count, copy_prop_count);
4014 return changed;
4017 /* Compute PRE+LCM working variables. */
4019 /* Local properties of expressions. */
4020 /* Nonzero for expressions that are transparent in the block. */
4021 static sbitmap *transp;
4023 /* Nonzero for expressions that are transparent at the end of the block.
4024 This is only zero for expressions killed by abnormal critical edge
4025 created by a calls. */
4026 static sbitmap *transpout;
4028 /* Nonzero for expressions that are computed (available) in the block. */
4029 static sbitmap *comp;
4031 /* Nonzero for expressions that are locally anticipatable in the block. */
4032 static sbitmap *antloc;
4034 /* Nonzero for expressions where this block is an optimal computation
4035 point. */
4036 static sbitmap *pre_optimal;
4038 /* Nonzero for expressions which are redundant in a particular block. */
4039 static sbitmap *pre_redundant;
4041 /* Nonzero for expressions which should be inserted on a specific edge. */
4042 static sbitmap *pre_insert_map;
4044 /* Nonzero for expressions which should be deleted in a specific block. */
4045 static sbitmap *pre_delete_map;
4047 /* Contains the edge_list returned by pre_edge_lcm. */
4048 static struct edge_list *edge_list;
4050 static sbitmap *temp_bitmap;
4052 /* Redundant insns. */
4053 static sbitmap pre_redundant_insns;
4055 /* Allocate vars used for PRE analysis. */
4057 static void
4058 alloc_pre_mem (n_blocks, n_exprs)
4059 int n_blocks, n_exprs;
4061 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4062 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4063 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
4064 temp_bitmap = sbitmap_vector_alloc (n_blocks, n_exprs);
4066 pre_optimal = NULL;
4067 pre_redundant = NULL;
4068 pre_insert_map = NULL;
4069 pre_delete_map = NULL;
4070 ae_in = NULL;
4071 ae_out = NULL;
4072 u_bitmap = NULL;
4073 transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
4074 ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
4076 /* pre_insert and pre_delete are allocated later. */
4079 /* Free vars used for PRE analysis. */
4081 static void
4082 free_pre_mem ()
4084 free (transp);
4085 free (comp);
4086 free (antloc);
4087 free (temp_bitmap);
4089 if (pre_optimal)
4090 free (pre_optimal);
4091 if (pre_redundant)
4092 free (pre_redundant);
4093 if (pre_insert_map)
4094 free (pre_insert_map);
4095 if (pre_delete_map)
4096 free (pre_delete_map);
4097 if (transpout)
4098 free (transpout);
4100 if (ae_in)
4101 free (ae_in);
4102 if (ae_out)
4103 free (ae_out);
4104 if (ae_kill)
4105 free (ae_kill);
4106 if (u_bitmap)
4107 free (u_bitmap);
4109 transp = comp = antloc = NULL;
4110 pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
4111 transpout = ae_in = ae_out = ae_kill = NULL;
4112 u_bitmap = NULL;
4116 /* Top level routine to do the dataflow analysis needed by PRE. */
4118 static void
4119 compute_pre_data ()
4121 compute_local_properties (transp, comp, antloc, 0);
4122 compute_transpout ();
4123 sbitmap_vector_zero (ae_kill, n_basic_blocks);
4124 compute_ae_kill (comp, ae_kill);
4125 edge_list = pre_edge_lcm (gcse_file, n_exprs, transp, comp, antloc,
4126 ae_kill, &pre_insert_map, &pre_delete_map);
4129 /* PRE utilities */
4131 /* Return non-zero if an occurrence of expression EXPR in OCCR_BB would reach
4132 block BB.
4134 VISITED is a pointer to a working buffer for tracking which BB's have
4135 been visited. It is NULL for the top-level call.
4137 We treat reaching expressions that go through blocks containing the same
4138 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
4139 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4140 2 as not reaching. The intent is to improve the probability of finding
4141 only one reaching expression and to reduce register lifetimes by picking
4142 the closest such expression. */
4144 static int
4145 pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited)
4146 int occr_bb;
4147 struct expr *expr;
4148 int bb;
4149 char *visited;
4151 edge pred;
4153 for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
4155 int pred_bb = pred->src->index;
4157 if (pred->src == ENTRY_BLOCK_PTR
4158 /* Has predecessor has already been visited? */
4159 || visited[pred_bb])
4160 ;/* Nothing to do. */
4162 /* Does this predecessor generate this expression? */
4163 else if (TEST_BIT (comp[pred_bb], expr->bitmap_index))
4165 /* Is this the occurrence we're looking for?
4166 Note that there's only one generating occurrence per block
4167 so we just need to check the block number. */
4168 if (occr_bb == pred_bb)
4169 return 1;
4171 visited[pred_bb] = 1;
4173 /* Ignore this predecessor if it kills the expression. */
4174 else if (! TEST_BIT (transp[pred_bb], expr->bitmap_index))
4175 visited[pred_bb] = 1;
4177 /* Neither gen nor kill. */
4178 else
4180 visited[pred_bb] = 1;
4181 if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
4182 return 1;
4186 /* All paths have been checked. */
4187 return 0;
4190 /* The wrapper for pre_expr_reaches_here_work that ensures that any
4191 memory allocated for that function is returned. */
4193 static int
4194 pre_expr_reaches_here_p (occr_bb, expr, bb)
4195 int occr_bb;
4196 struct expr *expr;
4197 int bb;
4199 int rval;
4200 char *visited = (char *) xcalloc (n_basic_blocks, 1);
4202 rval = pre_expr_reaches_here_p_work(occr_bb, expr, bb, visited);
4204 free (visited);
4205 return rval;
4209 /* Given an expr, generate RTL which we can insert at the end of a BB,
4210 or on an edge. Set the block number of any insns generated to
4211 the value of BB. */
4213 static rtx
4214 process_insert_insn (expr)
4215 struct expr *expr;
4217 rtx reg = expr->reaching_reg;
4218 rtx pat, copied_expr;
4219 rtx first_new_insn;
4221 start_sequence ();
4222 copied_expr = copy_rtx (expr->expr);
4223 emit_move_insn (reg, copied_expr);
4224 first_new_insn = get_insns ();
4225 pat = gen_sequence ();
4226 end_sequence ();
4228 return pat;
4231 /* Add EXPR to the end of basic block BB.
4233 This is used by both the PRE and code hoisting.
4235 For PRE, we want to verify that the expr is either transparent
4236 or locally anticipatable in the target block. This check makes
4237 no sense for code hoisting. */
4239 static void
4240 insert_insn_end_bb (expr, bb, pre)
4241 struct expr *expr;
4242 int bb;
4243 int pre;
4245 rtx insn = BLOCK_END (bb);
4246 rtx new_insn;
4247 rtx reg = expr->reaching_reg;
4248 int regno = REGNO (reg);
4249 rtx pat;
4250 int i;
4252 pat = process_insert_insn (expr);
4254 /* If the last insn is a jump, insert EXPR in front [taking care to
4255 handle cc0, etc. properly]. */
4257 if (GET_CODE (insn) == JUMP_INSN)
4259 #ifdef HAVE_cc0
4260 rtx note;
4261 #endif
4263 /* If this is a jump table, then we can't insert stuff here. Since
4264 we know the previous real insn must be the tablejump, we insert
4265 the new instruction just before the tablejump. */
4266 if (GET_CODE (PATTERN (insn)) == ADDR_VEC
4267 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
4268 insn = prev_real_insn (insn);
4270 #ifdef HAVE_cc0
4271 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4272 if cc0 isn't set. */
4273 note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
4274 if (note)
4275 insn = XEXP (note, 0);
4276 else
4278 rtx maybe_cc0_setter = prev_nonnote_insn (insn);
4279 if (maybe_cc0_setter
4280 && GET_RTX_CLASS (GET_CODE (maybe_cc0_setter)) == 'i'
4281 && sets_cc0_p (PATTERN (maybe_cc0_setter)))
4282 insn = maybe_cc0_setter;
4284 #endif
4285 /* FIXME: What if something in cc0/jump uses value set in new insn? */
4286 new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4289 /* Likewise if the last insn is a call, as will happen in the presence
4290 of exception handling. */
4291 else if (GET_CODE (insn) == CALL_INSN)
4293 HARD_REG_SET parm_regs;
4294 int nparm_regs;
4295 rtx p;
4297 /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4298 we search backward and place the instructions before the first
4299 parameter is loaded. Do this for everyone for consistency and a
4300 presumtion that we'll get better code elsewhere as well.
4302 It should always be the case that we can put these instructions
4303 anywhere in the basic block with performing PRE optimizations.
4304 Check this. */
4306 if (pre
4307 && !TEST_BIT (antloc[bb], expr->bitmap_index)
4308 && !TEST_BIT (transp[bb], expr->bitmap_index))
4309 abort ();
4311 /* Since different machines initialize their parameter registers
4312 in different orders, assume nothing. Collect the set of all
4313 parameter registers. */
4314 CLEAR_HARD_REG_SET (parm_regs);
4315 nparm_regs = 0;
4316 for (p = CALL_INSN_FUNCTION_USAGE (insn); p ; p = XEXP (p, 1))
4317 if (GET_CODE (XEXP (p, 0)) == USE
4318 && GET_CODE (XEXP (XEXP (p, 0), 0)) == REG)
4320 if (REGNO (XEXP (XEXP (p, 0), 0)) >= FIRST_PSEUDO_REGISTER)
4321 abort ();
4323 SET_HARD_REG_BIT (parm_regs, REGNO (XEXP (XEXP (p, 0), 0)));
4324 nparm_regs++;
4327 /* Search backward for the first set of a register in this set. */
4328 while (nparm_regs && BLOCK_HEAD (bb) != insn)
4330 insn = PREV_INSN (insn);
4331 p = single_set (insn);
4332 if (p && GET_CODE (SET_DEST (p)) == REG
4333 && REGNO (SET_DEST (p)) < FIRST_PSEUDO_REGISTER
4334 && TEST_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p))))
4336 CLEAR_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p)));
4337 nparm_regs--;
4341 /* If we found all the parameter loads, then we want to insert
4342 before the first parameter load.
4344 If we did not find all the parameter loads, then we might have
4345 stopped on the head of the block, which could be a CODE_LABEL.
4346 If we inserted before the CODE_LABEL, then we would be putting
4347 the insn in the wrong basic block. In that case, put the insn
4348 after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */
4349 if (GET_CODE (insn) == CODE_LABEL)
4350 insn = NEXT_INSN (insn);
4351 else if (GET_CODE (insn) == NOTE
4352 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
4353 insn = NEXT_INSN (insn);
4355 new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4357 else
4359 new_insn = emit_insn_after (pat, insn);
4360 BLOCK_END (bb) = new_insn;
4363 /* Keep block number table up to date.
4364 Note, PAT could be a multiple insn sequence, we have to make
4365 sure that each insn in the sequence is handled. */
4366 if (GET_CODE (pat) == SEQUENCE)
4368 for (i = 0; i < XVECLEN (pat, 0); i++)
4370 rtx insn = XVECEXP (pat, 0, i);
4372 set_block_num (insn, bb);
4373 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
4374 add_label_notes (PATTERN (insn), new_insn);
4376 note_stores (PATTERN (insn), record_set_info, insn);
4379 else
4381 add_label_notes (SET_SRC (pat), new_insn);
4382 set_block_num (new_insn, bb);
4384 /* Keep register set table up to date. */
4385 record_one_set (regno, new_insn);
4388 gcse_create_count++;
4390 if (gcse_file)
4392 fprintf (gcse_file, "PRE/HOIST: end of bb %d, insn %d, ",
4393 bb, INSN_UID (new_insn));
4394 fprintf (gcse_file, "copying expression %d to reg %d\n",
4395 expr->bitmap_index, regno);
4399 /* Insert partially redundant expressions on edges in the CFG to make
4400 the expressions fully redundant. */
4402 static int
4403 pre_edge_insert (edge_list, index_map)
4404 struct edge_list *edge_list;
4405 struct expr **index_map;
4407 int e, i, j, num_edges, set_size, did_insert = 0;
4408 sbitmap *inserted;
4410 /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
4411 if it reaches any of the deleted expressions. */
4413 set_size = pre_insert_map[0]->size;
4414 num_edges = NUM_EDGES (edge_list);
4415 inserted = sbitmap_vector_alloc (num_edges, n_exprs);
4416 sbitmap_vector_zero (inserted, num_edges);
4418 for (e = 0; e < num_edges; e++)
4420 int indx;
4421 basic_block pred = INDEX_EDGE_PRED_BB (edge_list, e);
4422 int bb = pred->index;
4424 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
4426 SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
4428 for (j = indx; insert && j < n_exprs; j++, insert >>= 1)
4429 if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
4431 struct expr *expr = index_map[j];
4432 struct occr *occr;
4434 /* Now look at each deleted occurence of this expression. */
4435 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4437 if (! occr->deleted_p)
4438 continue;
4440 /* Insert this expression on this edge if if it would
4441 reach the deleted occurence in BB. */
4442 if (!TEST_BIT (inserted[e], j))
4444 rtx insn;
4445 edge eg = INDEX_EDGE (edge_list, e);
4447 /* We can't insert anything on an abnormal and
4448 critical edge, so we insert the insn at the end of
4449 the previous block. There are several alternatives
4450 detailed in Morgans book P277 (sec 10.5) for
4451 handling this situation. This one is easiest for
4452 now. */
4454 if ((eg->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL)
4455 insert_insn_end_bb (index_map[j], bb, 0);
4456 else
4458 insn = process_insert_insn (index_map[j]);
4459 insert_insn_on_edge (insn, eg);
4462 if (gcse_file)
4464 fprintf (gcse_file, "PRE/HOIST: edge (%d,%d), ",
4466 INDEX_EDGE_SUCC_BB (edge_list, e)->index);
4467 fprintf (gcse_file, "copy expression %d\n",
4468 expr->bitmap_index);
4471 SET_BIT (inserted[e], j);
4472 did_insert = 1;
4473 gcse_create_count++;
4480 free (inserted);
4481 return did_insert;
4484 /* Copy the result of INSN to REG. INDX is the expression number. */
4486 static void
4487 pre_insert_copy_insn (expr, insn)
4488 struct expr *expr;
4489 rtx insn;
4491 rtx reg = expr->reaching_reg;
4492 int regno = REGNO (reg);
4493 int indx = expr->bitmap_index;
4494 rtx set = single_set (insn);
4495 rtx new_insn;
4496 int bb = BLOCK_NUM (insn);
4498 if (!set)
4499 abort ();
4501 new_insn = emit_insn_after (gen_rtx_SET (VOIDmode, reg, SET_DEST (set)),
4502 insn);
4504 /* Keep block number table up to date. */
4505 set_block_num (new_insn, bb);
4507 /* Keep register set table up to date. */
4508 record_one_set (regno, new_insn);
4509 if (insn == BLOCK_END (bb))
4510 BLOCK_END (bb) = new_insn;
4512 gcse_create_count++;
4514 if (gcse_file)
4515 fprintf (gcse_file,
4516 "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
4517 BLOCK_NUM (insn), INSN_UID (new_insn), indx,
4518 INSN_UID (insn), regno);
4521 /* Copy available expressions that reach the redundant expression
4522 to `reaching_reg'. */
4524 static void
4525 pre_insert_copies ()
4527 unsigned int i;
4528 struct expr *expr;
4529 struct occr *occr;
4530 struct occr *avail;
4532 /* For each available expression in the table, copy the result to
4533 `reaching_reg' if the expression reaches a deleted one.
4535 ??? The current algorithm is rather brute force.
4536 Need to do some profiling. */
4538 for (i = 0; i < expr_hash_table_size; i++)
4539 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4541 /* If the basic block isn't reachable, PPOUT will be TRUE. However,
4542 we don't want to insert a copy here because the expression may not
4543 really be redundant. So only insert an insn if the expression was
4544 deleted. This test also avoids further processing if the
4545 expression wasn't deleted anywhere. */
4546 if (expr->reaching_reg == NULL)
4547 continue;
4549 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4551 if (! occr->deleted_p)
4552 continue;
4554 for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
4556 rtx insn = avail->insn;
4558 /* No need to handle this one if handled already. */
4559 if (avail->copied_p)
4560 continue;
4562 /* Don't handle this one if it's a redundant one. */
4563 if (TEST_BIT (pre_redundant_insns, INSN_CUID (insn)))
4564 continue;
4566 /* Or if the expression doesn't reach the deleted one. */
4567 if (! pre_expr_reaches_here_p (BLOCK_NUM (avail->insn), expr,
4568 BLOCK_NUM (occr->insn)))
4569 continue;
4571 /* Copy the result of avail to reaching_reg. */
4572 pre_insert_copy_insn (expr, insn);
4573 avail->copied_p = 1;
4579 /* Delete redundant computations.
4580 Deletion is done by changing the insn to copy the `reaching_reg' of
4581 the expression into the result of the SET. It is left to later passes
4582 (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4584 Returns non-zero if a change is made. */
4586 static int
4587 pre_delete ()
4589 unsigned int i;
4590 int bb, changed;
4591 struct expr *expr;
4592 struct occr *occr;
4594 /* Compute the expressions which are redundant and need to be replaced by
4595 copies from the reaching reg to the target reg. */
4596 for (bb = 0; bb < n_basic_blocks; bb++)
4597 sbitmap_copy (temp_bitmap[bb], pre_delete_map[bb]);
4599 changed = 0;
4600 for (i = 0; i < expr_hash_table_size; i++)
4601 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4603 int indx = expr->bitmap_index;
4605 /* We only need to search antic_occr since we require
4606 ANTLOC != 0. */
4608 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4610 rtx insn = occr->insn;
4611 rtx set;
4612 int bb = BLOCK_NUM (insn);
4614 if (TEST_BIT (temp_bitmap[bb], indx))
4616 set = single_set (insn);
4617 if (! set)
4618 abort ();
4620 /* Create a pseudo-reg to store the result of reaching
4621 expressions into. Get the mode for the new pseudo from
4622 the mode of the original destination pseudo. */
4623 if (expr->reaching_reg == NULL)
4624 expr->reaching_reg
4625 = gen_reg_rtx (GET_MODE (SET_DEST (set)));
4627 /* In theory this should never fail since we're creating
4628 a reg->reg copy.
4630 However, on the x86 some of the movXX patterns actually
4631 contain clobbers of scratch regs. This may cause the
4632 insn created by validate_change to not match any pattern
4633 and thus cause validate_change to fail. */
4634 if (validate_change (insn, &SET_SRC (set),
4635 expr->reaching_reg, 0))
4637 occr->deleted_p = 1;
4638 SET_BIT (pre_redundant_insns, INSN_CUID (insn));
4639 changed = 1;
4640 gcse_subst_count++;
4643 if (gcse_file)
4645 fprintf (gcse_file,
4646 "PRE: redundant insn %d (expression %d) in ",
4647 INSN_UID (insn), indx);
4648 fprintf (gcse_file, "bb %d, reaching reg is %d\n",
4649 bb, REGNO (expr->reaching_reg));
4655 return changed;
4658 /* Perform GCSE optimizations using PRE.
4659 This is called by one_pre_gcse_pass after all the dataflow analysis
4660 has been done.
4662 This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
4663 lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
4664 Compiler Design and Implementation.
4666 ??? A new pseudo reg is created to hold the reaching expression. The nice
4667 thing about the classical approach is that it would try to use an existing
4668 reg. If the register can't be adequately optimized [i.e. we introduce
4669 reload problems], one could add a pass here to propagate the new register
4670 through the block.
4672 ??? We don't handle single sets in PARALLELs because we're [currently] not
4673 able to copy the rest of the parallel when we insert copies to create full
4674 redundancies from partial redundancies. However, there's no reason why we
4675 can't handle PARALLELs in the cases where there are no partial
4676 redundancies. */
4678 static int
4679 pre_gcse ()
4681 unsigned int i;
4682 int did_insert, changed;
4683 struct expr **index_map;
4684 struct expr *expr;
4686 /* Compute a mapping from expression number (`bitmap_index') to
4687 hash table entry. */
4689 index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
4690 for (i = 0; i < expr_hash_table_size; i++)
4691 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4692 index_map[expr->bitmap_index] = expr;
4694 /* Reset bitmap used to track which insns are redundant. */
4695 pre_redundant_insns = sbitmap_alloc (max_cuid);
4696 sbitmap_zero (pre_redundant_insns);
4698 /* Delete the redundant insns first so that
4699 - we know what register to use for the new insns and for the other
4700 ones with reaching expressions
4701 - we know which insns are redundant when we go to create copies */
4703 changed = pre_delete ();
4705 did_insert = pre_edge_insert (edge_list, index_map);
4707 /* In other places with reaching expressions, copy the expression to the
4708 specially allocated pseudo-reg that reaches the redundant expr. */
4709 pre_insert_copies ();
4710 if (did_insert)
4712 commit_edge_insertions ();
4713 changed = 1;
4716 free (index_map);
4717 free (pre_redundant_insns);
4718 return changed;
4721 /* Top level routine to perform one PRE GCSE pass.
4723 Return non-zero if a change was made. */
4725 static int
4726 one_pre_gcse_pass (pass)
4727 int pass;
4729 int changed = 0;
4731 gcse_subst_count = 0;
4732 gcse_create_count = 0;
4734 alloc_expr_hash_table (max_cuid);
4735 add_noreturn_fake_exit_edges ();
4736 compute_expr_hash_table ();
4737 if (gcse_file)
4738 dump_hash_table (gcse_file, "Expression", expr_hash_table,
4739 expr_hash_table_size, n_exprs);
4741 if (n_exprs > 0)
4743 alloc_pre_mem (n_basic_blocks, n_exprs);
4744 compute_pre_data ();
4745 changed |= pre_gcse ();
4746 free_edge_list (edge_list);
4747 free_pre_mem ();
4750 remove_fake_edges ();
4751 free_expr_hash_table ();
4753 if (gcse_file)
4755 fprintf (gcse_file, "\nPRE GCSE of %s, pass %d: %d bytes needed, ",
4756 current_function_name, pass, bytes_used);
4757 fprintf (gcse_file, "%d substs, %d insns created\n",
4758 gcse_subst_count, gcse_create_count);
4761 return changed;
4764 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
4765 We have to add REG_LABEL notes, because the following loop optimization
4766 pass requires them. */
4768 /* ??? This is very similar to the loop.c add_label_notes function. We
4769 could probably share code here. */
4771 /* ??? If there was a jump optimization pass after gcse and before loop,
4772 then we would not need to do this here, because jump would add the
4773 necessary REG_LABEL notes. */
4775 static void
4776 add_label_notes (x, insn)
4777 rtx x;
4778 rtx insn;
4780 enum rtx_code code = GET_CODE (x);
4781 int i, j;
4782 const char *fmt;
4784 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4786 /* This code used to ignore labels that referred to dispatch tables to
4787 avoid flow generating (slighly) worse code.
4789 We no longer ignore such label references (see LABEL_REF handling in
4790 mark_jump_label for additional information). */
4792 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, XEXP (x, 0),
4793 REG_NOTES (insn));
4794 return;
4797 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
4799 if (fmt[i] == 'e')
4800 add_label_notes (XEXP (x, i), insn);
4801 else if (fmt[i] == 'E')
4802 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4803 add_label_notes (XVECEXP (x, i, j), insn);
4807 /* Compute transparent outgoing information for each block.
4809 An expression is transparent to an edge unless it is killed by
4810 the edge itself. This can only happen with abnormal control flow,
4811 when the edge is traversed through a call. This happens with
4812 non-local labels and exceptions.
4814 This would not be necessary if we split the edge. While this is
4815 normally impossible for abnormal critical edges, with some effort
4816 it should be possible with exception handling, since we still have
4817 control over which handler should be invoked. But due to increased
4818 EH table sizes, this may not be worthwhile. */
4820 static void
4821 compute_transpout ()
4823 int bb;
4824 unsigned int i;
4825 struct expr *expr;
4827 sbitmap_vector_ones (transpout, n_basic_blocks);
4829 for (bb = 0; bb < n_basic_blocks; ++bb)
4831 /* Note that flow inserted a nop a the end of basic blocks that
4832 end in call instructions for reasons other than abnormal
4833 control flow. */
4834 if (GET_CODE (BLOCK_END (bb)) != CALL_INSN)
4835 continue;
4837 for (i = 0; i < expr_hash_table_size; i++)
4838 for (expr = expr_hash_table[i]; expr ; expr = expr->next_same_hash)
4839 if (GET_CODE (expr->expr) == MEM)
4841 if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
4842 && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
4843 continue;
4845 /* ??? Optimally, we would use interprocedural alias
4846 analysis to determine if this mem is actually killed
4847 by this call. */
4848 RESET_BIT (transpout[bb], expr->bitmap_index);
4853 /* Removal of useless null pointer checks */
4855 /* Called via note_stores. X is set by SETTER. If X is a register we must
4856 invalidate nonnull_local and set nonnull_killed. DATA is really a
4857 `null_pointer_info *'.
4859 We ignore hard registers. */
4861 static void
4862 invalidate_nonnull_info (x, setter, data)
4863 rtx x;
4864 rtx setter ATTRIBUTE_UNUSED;
4865 void *data;
4867 unsigned int regno;
4868 struct null_pointer_info *npi = (struct null_pointer_info *) data;
4870 while (GET_CODE (x) == SUBREG)
4871 x = SUBREG_REG (x);
4873 /* Ignore anything that is not a register or is a hard register. */
4874 if (GET_CODE (x) != REG
4875 || REGNO (x) < npi->min_reg
4876 || REGNO (x) >= npi->max_reg)
4877 return;
4879 regno = REGNO (x) - npi->min_reg;
4881 RESET_BIT (npi->nonnull_local[npi->current_block], regno);
4882 SET_BIT (npi->nonnull_killed[npi->current_block], regno);
4885 /* Do null-pointer check elimination for the registers indicated in
4886 NPI. NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps;
4887 they are not our responsibility to free. */
4889 static void
4890 delete_null_pointer_checks_1 (block_reg, nonnull_avin, nonnull_avout, npi)
4891 unsigned int *block_reg;
4892 sbitmap *nonnull_avin;
4893 sbitmap *nonnull_avout;
4894 struct null_pointer_info *npi;
4896 int bb;
4897 int current_block;
4898 sbitmap *nonnull_local = npi->nonnull_local;
4899 sbitmap *nonnull_killed = npi->nonnull_killed;
4901 /* Compute local properties, nonnull and killed. A register will have
4902 the nonnull property if at the end of the current block its value is
4903 known to be nonnull. The killed property indicates that somewhere in
4904 the block any information we had about the register is killed.
4906 Note that a register can have both properties in a single block. That
4907 indicates that it's killed, then later in the block a new value is
4908 computed. */
4909 sbitmap_vector_zero (nonnull_local, n_basic_blocks);
4910 sbitmap_vector_zero (nonnull_killed, n_basic_blocks);
4912 for (current_block = 0; current_block < n_basic_blocks; current_block++)
4914 rtx insn, stop_insn;
4916 /* Set the current block for invalidate_nonnull_info. */
4917 npi->current_block = current_block;
4919 /* Scan each insn in the basic block looking for memory references and
4920 register sets. */
4921 stop_insn = NEXT_INSN (BLOCK_END (current_block));
4922 for (insn = BLOCK_HEAD (current_block);
4923 insn != stop_insn;
4924 insn = NEXT_INSN (insn))
4926 rtx set;
4927 rtx reg;
4929 /* Ignore anything that is not a normal insn. */
4930 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
4931 continue;
4933 /* Basically ignore anything that is not a simple SET. We do have
4934 to make sure to invalidate nonnull_local and set nonnull_killed
4935 for such insns though. */
4936 set = single_set (insn);
4937 if (!set)
4939 note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
4940 continue;
4943 /* See if we've got a useable memory load. We handle it first
4944 in case it uses its address register as a dest (which kills
4945 the nonnull property). */
4946 if (GET_CODE (SET_SRC (set)) == MEM
4947 && GET_CODE ((reg = XEXP (SET_SRC (set), 0))) == REG
4948 && REGNO (reg) >= npi->min_reg
4949 && REGNO (reg) < npi->max_reg)
4950 SET_BIT (nonnull_local[current_block],
4951 REGNO (reg) - npi->min_reg);
4953 /* Now invalidate stuff clobbered by this insn. */
4954 note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
4956 /* And handle stores, we do these last since any sets in INSN can
4957 not kill the nonnull property if it is derived from a MEM
4958 appearing in a SET_DEST. */
4959 if (GET_CODE (SET_DEST (set)) == MEM
4960 && GET_CODE ((reg = XEXP (SET_DEST (set), 0))) == REG
4961 && REGNO (reg) >= npi->min_reg
4962 && REGNO (reg) < npi->max_reg)
4963 SET_BIT (nonnull_local[current_block],
4964 REGNO (reg) - npi->min_reg);
4968 /* Now compute global properties based on the local properties. This
4969 is a classic global availablity algorithm. */
4970 compute_available (nonnull_local, nonnull_killed,
4971 nonnull_avout, nonnull_avin);
4973 /* Now look at each bb and see if it ends with a compare of a value
4974 against zero. */
4975 for (bb = 0; bb < n_basic_blocks; bb++)
4977 rtx last_insn = BLOCK_END (bb);
4978 rtx condition, earliest;
4979 int compare_and_branch;
4981 /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and
4982 since BLOCK_REG[BB] is zero if this block did not end with a
4983 comparison against zero, this condition works. */
4984 if (block_reg[bb] < npi->min_reg
4985 || block_reg[bb] >= npi->max_reg)
4986 continue;
4988 /* LAST_INSN is a conditional jump. Get its condition. */
4989 condition = get_condition (last_insn, &earliest);
4991 /* If we can't determine the condition then skip. */
4992 if (! condition)
4993 continue;
4995 /* Is the register known to have a nonzero value? */
4996 if (!TEST_BIT (nonnull_avout[bb], block_reg[bb] - npi->min_reg))
4997 continue;
4999 /* Try to compute whether the compare/branch at the loop end is one or
5000 two instructions. */
5001 if (earliest == last_insn)
5002 compare_and_branch = 1;
5003 else if (earliest == prev_nonnote_insn (last_insn))
5004 compare_and_branch = 2;
5005 else
5006 continue;
5008 /* We know the register in this comparison is nonnull at exit from
5009 this block. We can optimize this comparison. */
5010 if (GET_CODE (condition) == NE)
5012 rtx new_jump;
5014 new_jump = emit_jump_insn_before (gen_jump (JUMP_LABEL (last_insn)),
5015 last_insn);
5016 JUMP_LABEL (new_jump) = JUMP_LABEL (last_insn);
5017 LABEL_NUSES (JUMP_LABEL (new_jump))++;
5018 emit_barrier_after (new_jump);
5020 delete_insn (last_insn);
5021 if (compare_and_branch == 2)
5022 delete_insn (earliest);
5024 /* Don't check this block again. (Note that BLOCK_END is
5025 invalid here; we deleted the last instruction in the
5026 block.) */
5027 block_reg[bb] = 0;
5031 /* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
5032 at compile time.
5034 This is conceptually similar to global constant/copy propagation and
5035 classic global CSE (it even uses the same dataflow equations as cprop).
5037 If a register is used as memory address with the form (mem (reg)), then we
5038 know that REG can not be zero at that point in the program. Any instruction
5039 which sets REG "kills" this property.
5041 So, if every path leading to a conditional branch has an available memory
5042 reference of that form, then we know the register can not have the value
5043 zero at the conditional branch.
5045 So we merely need to compute the local properies and propagate that data
5046 around the cfg, then optimize where possible.
5048 We run this pass two times. Once before CSE, then again after CSE. This
5049 has proven to be the most profitable approach. It is rare for new
5050 optimization opportunities of this nature to appear after the first CSE
5051 pass.
5053 This could probably be integrated with global cprop with a little work. */
5055 void
5056 delete_null_pointer_checks (f)
5057 rtx f ATTRIBUTE_UNUSED;
5059 sbitmap *nonnull_avin, *nonnull_avout;
5060 unsigned int *block_reg;
5061 int bb;
5062 int reg;
5063 int regs_per_pass;
5064 int max_reg;
5065 struct null_pointer_info npi;
5067 /* If we have only a single block, then there's nothing to do. */
5068 if (n_basic_blocks <= 1)
5069 return;
5071 /* Trying to perform global optimizations on flow graphs which have
5072 a high connectivity will take a long time and is unlikely to be
5073 particularly useful.
5075 In normal circumstances a cfg should have about twice has many edges
5076 as blocks. But we do not want to punish small functions which have
5077 a couple switch statements. So we require a relatively large number
5078 of basic blocks and the ratio of edges to blocks to be high. */
5079 if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
5080 return;
5082 /* We need four bitmaps, each with a bit for each register in each
5083 basic block. */
5084 max_reg = max_reg_num ();
5085 regs_per_pass = get_bitmap_width (4, n_basic_blocks, max_reg);
5087 /* Allocate bitmaps to hold local and global properties. */
5088 npi.nonnull_local = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5089 npi.nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5090 nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5091 nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5093 /* Go through the basic blocks, seeing whether or not each block
5094 ends with a conditional branch whose condition is a comparison
5095 against zero. Record the register compared in BLOCK_REG. */
5096 block_reg = (unsigned int *) xcalloc (n_basic_blocks, sizeof (int));
5097 for (bb = 0; bb < n_basic_blocks; bb++)
5099 rtx last_insn = BLOCK_END (bb);
5100 rtx condition, earliest, reg;
5102 /* We only want conditional branches. */
5103 if (GET_CODE (last_insn) != JUMP_INSN
5104 || !condjump_p (last_insn)
5105 || simplejump_p (last_insn))
5106 continue;
5108 /* LAST_INSN is a conditional jump. Get its condition. */
5109 condition = get_condition (last_insn, &earliest);
5111 /* If we were unable to get the condition, or it is not a equality
5112 comparison against zero then there's nothing we can do. */
5113 if (!condition
5114 || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
5115 || GET_CODE (XEXP (condition, 1)) != CONST_INT
5116 || (XEXP (condition, 1)
5117 != CONST0_RTX (GET_MODE (XEXP (condition, 0)))))
5118 continue;
5120 /* We must be checking a register against zero. */
5121 reg = XEXP (condition, 0);
5122 if (GET_CODE (reg) != REG)
5123 continue;
5125 block_reg[bb] = REGNO (reg);
5128 /* Go through the algorithm for each block of registers. */
5129 for (reg = FIRST_PSEUDO_REGISTER; reg < max_reg; reg += regs_per_pass)
5131 npi.min_reg = reg;
5132 npi.max_reg = MIN (reg + regs_per_pass, max_reg);
5133 delete_null_pointer_checks_1 (block_reg, nonnull_avin,
5134 nonnull_avout, &npi);
5137 /* Free the table of registers compared at the end of every block. */
5138 free (block_reg);
5140 /* Free bitmaps. */
5141 free (npi.nonnull_local);
5142 free (npi.nonnull_killed);
5143 free (nonnull_avin);
5144 free (nonnull_avout);
5147 /* Code Hoisting variables and subroutines. */
5149 /* Very busy expressions. */
5150 static sbitmap *hoist_vbein;
5151 static sbitmap *hoist_vbeout;
5153 /* Hoistable expressions. */
5154 static sbitmap *hoist_exprs;
5156 /* Dominator bitmaps. */
5157 static sbitmap *dominators;
5159 /* ??? We could compute post dominators and run this algorithm in
5160 reverse to to perform tail merging, doing so would probably be
5161 more effective than the tail merging code in jump.c.
5163 It's unclear if tail merging could be run in parallel with
5164 code hoisting. It would be nice. */
5166 /* Allocate vars used for code hoisting analysis. */
5168 static void
5169 alloc_code_hoist_mem (n_blocks, n_exprs)
5170 int n_blocks, n_exprs;
5172 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
5173 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
5174 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
5176 hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
5177 hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
5178 hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
5179 transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
5181 dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
5184 /* Free vars used for code hoisting analysis. */
5186 static void
5187 free_code_hoist_mem ()
5189 free (antloc);
5190 free (transp);
5191 free (comp);
5193 free (hoist_vbein);
5194 free (hoist_vbeout);
5195 free (hoist_exprs);
5196 free (transpout);
5198 free (dominators);
5201 /* Compute the very busy expressions at entry/exit from each block.
5203 An expression is very busy if all paths from a given point
5204 compute the expression. */
5206 static void
5207 compute_code_hoist_vbeinout ()
5209 int bb, changed, passes;
5211 sbitmap_vector_zero (hoist_vbeout, n_basic_blocks);
5212 sbitmap_vector_zero (hoist_vbein, n_basic_blocks);
5214 passes = 0;
5215 changed = 1;
5217 while (changed)
5219 changed = 0;
5221 /* We scan the blocks in the reverse order to speed up
5222 the convergence. */
5223 for (bb = n_basic_blocks - 1; bb >= 0; bb--)
5225 changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb],
5226 hoist_vbeout[bb], transp[bb]);
5227 if (bb != n_basic_blocks - 1)
5228 sbitmap_intersection_of_succs (hoist_vbeout[bb], hoist_vbein, bb);
5231 passes++;
5234 if (gcse_file)
5235 fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes);
5238 /* Top level routine to do the dataflow analysis needed by code hoisting. */
5240 static void
5241 compute_code_hoist_data ()
5243 compute_local_properties (transp, comp, antloc, 0);
5244 compute_transpout ();
5245 compute_code_hoist_vbeinout ();
5246 compute_flow_dominators (dominators, NULL);
5247 if (gcse_file)
5248 fprintf (gcse_file, "\n");
5251 /* Determine if the expression identified by EXPR_INDEX would
5252 reach BB unimpared if it was placed at the end of EXPR_BB.
5254 It's unclear exactly what Muchnick meant by "unimpared". It seems
5255 to me that the expression must either be computed or transparent in
5256 *every* block in the path(s) from EXPR_BB to BB. Any other definition
5257 would allow the expression to be hoisted out of loops, even if
5258 the expression wasn't a loop invariant.
5260 Contrast this to reachability for PRE where an expression is
5261 considered reachable if *any* path reaches instead of *all*
5262 paths. */
5264 static int
5265 hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited)
5266 int expr_bb;
5267 int expr_index;
5268 int bb;
5269 char *visited;
5271 edge pred;
5272 int visited_allocated_locally = 0;
5275 if (visited == NULL)
5277 visited_allocated_locally = 1;
5278 visited = xcalloc (n_basic_blocks, 1);
5281 visited[expr_bb] = 1;
5282 for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
5284 int pred_bb = pred->src->index;
5286 if (pred->src == ENTRY_BLOCK_PTR)
5287 break;
5288 else if (visited[pred_bb])
5289 continue;
5291 /* Does this predecessor generate this expression? */
5292 else if (TEST_BIT (comp[pred_bb], expr_index))
5293 break;
5294 else if (! TEST_BIT (transp[pred_bb], expr_index))
5295 break;
5297 /* Not killed. */
5298 else
5300 visited[pred_bb] = 1;
5301 if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
5302 pred_bb, visited))
5303 break;
5306 if (visited_allocated_locally)
5307 free (visited);
5309 return (pred == NULL);
5312 /* Actually perform code hoisting. */
5314 static void
5315 hoist_code ()
5317 int bb, dominated;
5318 unsigned int i;
5319 struct expr **index_map;
5320 struct expr *expr;
5322 sbitmap_vector_zero (hoist_exprs, n_basic_blocks);
5324 /* Compute a mapping from expression number (`bitmap_index') to
5325 hash table entry. */
5327 index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
5328 for (i = 0; i < expr_hash_table_size; i++)
5329 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
5330 index_map[expr->bitmap_index] = expr;
5332 /* Walk over each basic block looking for potentially hoistable
5333 expressions, nothing gets hoisted from the entry block. */
5334 for (bb = 0; bb < n_basic_blocks; bb++)
5336 int found = 0;
5337 int insn_inserted_p;
5339 /* Examine each expression that is very busy at the exit of this
5340 block. These are the potentially hoistable expressions. */
5341 for (i = 0; i < hoist_vbeout[bb]->n_bits; i++)
5343 int hoistable = 0;
5345 if (TEST_BIT (hoist_vbeout[bb], i) && TEST_BIT (transpout[bb], i))
5347 /* We've found a potentially hoistable expression, now
5348 we look at every block BB dominates to see if it
5349 computes the expression. */
5350 for (dominated = 0; dominated < n_basic_blocks; dominated++)
5352 /* Ignore self dominance. */
5353 if (bb == dominated
5354 || ! TEST_BIT (dominators[dominated], bb))
5355 continue;
5357 /* We've found a dominated block, now see if it computes
5358 the busy expression and whether or not moving that
5359 expression to the "beginning" of that block is safe. */
5360 if (!TEST_BIT (antloc[dominated], i))
5361 continue;
5363 /* Note if the expression would reach the dominated block
5364 unimpared if it was placed at the end of BB.
5366 Keep track of how many times this expression is hoistable
5367 from a dominated block into BB. */
5368 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5369 hoistable++;
5372 /* If we found more than one hoistable occurence of this
5373 expression, then note it in the bitmap of expressions to
5374 hoist. It makes no sense to hoist things which are computed
5375 in only one BB, and doing so tends to pessimize register
5376 allocation. One could increase this value to try harder
5377 to avoid any possible code expansion due to register
5378 allocation issues; however experiments have shown that
5379 the vast majority of hoistable expressions are only movable
5380 from two successors, so raising this threshhold is likely
5381 to nullify any benefit we get from code hoisting. */
5382 if (hoistable > 1)
5384 SET_BIT (hoist_exprs[bb], i);
5385 found = 1;
5390 /* If we found nothing to hoist, then quit now. */
5391 if (! found)
5392 continue;
5394 /* Loop over all the hoistable expressions. */
5395 for (i = 0; i < hoist_exprs[bb]->n_bits; i++)
5397 /* We want to insert the expression into BB only once, so
5398 note when we've inserted it. */
5399 insn_inserted_p = 0;
5401 /* These tests should be the same as the tests above. */
5402 if (TEST_BIT (hoist_vbeout[bb], i))
5404 /* We've found a potentially hoistable expression, now
5405 we look at every block BB dominates to see if it
5406 computes the expression. */
5407 for (dominated = 0; dominated < n_basic_blocks; dominated++)
5409 /* Ignore self dominance. */
5410 if (bb == dominated
5411 || ! TEST_BIT (dominators[dominated], bb))
5412 continue;
5414 /* We've found a dominated block, now see if it computes
5415 the busy expression and whether or not moving that
5416 expression to the "beginning" of that block is safe. */
5417 if (!TEST_BIT (antloc[dominated], i))
5418 continue;
5420 /* The expression is computed in the dominated block and
5421 it would be safe to compute it at the start of the
5422 dominated block. Now we have to determine if the
5423 expresion would reach the dominated block if it was
5424 placed at the end of BB. */
5425 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5427 struct expr *expr = index_map[i];
5428 struct occr *occr = expr->antic_occr;
5429 rtx insn;
5430 rtx set;
5432 /* Find the right occurence of this expression. */
5433 while (BLOCK_NUM (occr->insn) != dominated && occr)
5434 occr = occr->next;
5436 /* Should never happen. */
5437 if (!occr)
5438 abort ();
5440 insn = occr->insn;
5442 set = single_set (insn);
5443 if (! set)
5444 abort ();
5446 /* Create a pseudo-reg to store the result of reaching
5447 expressions into. Get the mode for the new pseudo
5448 from the mode of the original destination pseudo. */
5449 if (expr->reaching_reg == NULL)
5450 expr->reaching_reg
5451 = gen_reg_rtx (GET_MODE (SET_DEST (set)));
5453 /* In theory this should never fail since we're creating
5454 a reg->reg copy.
5456 However, on the x86 some of the movXX patterns
5457 actually contain clobbers of scratch regs. This may
5458 cause the insn created by validate_change to not
5459 match any pattern and thus cause validate_change to
5460 fail. */
5461 if (validate_change (insn, &SET_SRC (set),
5462 expr->reaching_reg, 0))
5464 occr->deleted_p = 1;
5465 if (!insn_inserted_p)
5467 insert_insn_end_bb (index_map[i], bb, 0);
5468 insn_inserted_p = 1;
5477 free (index_map);
5480 /* Top level routine to perform one code hoisting (aka unification) pass
5482 Return non-zero if a change was made. */
5484 static int
5485 one_code_hoisting_pass ()
5487 int changed = 0;
5489 alloc_expr_hash_table (max_cuid);
5490 compute_expr_hash_table ();
5491 if (gcse_file)
5492 dump_hash_table (gcse_file, "Code Hosting Expressions", expr_hash_table,
5493 expr_hash_table_size, n_exprs);
5495 if (n_exprs > 0)
5497 alloc_code_hoist_mem (n_basic_blocks, n_exprs);
5498 compute_code_hoist_data ();
5499 hoist_code ();
5500 free_code_hoist_mem ();
5503 free_expr_hash_table ();
5505 return changed;