1 /* Global common subexpression elimination/Partial redundancy elimination
2 and global constant/copy propagation for GNU compiler.
3 Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
23 - reordering of memory allocation and freeing to be more space efficient
24 - do rough calc of how many regs are needed in each block, and a rough
25 calc of how many regs are available in each class and use that to
26 throttle back the code in cases where RTX_COST is minimal.
27 - dead store elimination
28 - a store to the same address as a load does not kill the load if the
29 source of the store is also the destination of the load. Handling this
30 allows more load motion, particularly out of loops.
31 - ability to realloc sbitmap vectors would allow one initial computation
32 of reg_set_in_block with only subsequent additions, rather than
33 recomputing it for each pass
37 /* References searched while implementing this.
39 Compilers Principles, Techniques and Tools
43 Global Optimization by Suppression of Partial Redundancies
45 communications of the acm, Vol. 22, Num. 2, Feb. 1979
47 A Portable Machine-Independent Global Optimizer - Design and Measurements
49 Stanford Ph.D. thesis, Dec. 1983
51 A Fast Algorithm for Code Movement Optimization
53 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
55 A Solution to a Problem with Morel and Renvoise's
56 Global Optimization by Suppression of Partial Redundancies
57 K-H Drechsler, M.P. Stadel
58 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
60 Practical Adaptation of the Global Optimization
61 Algorithm of Morel and Renvoise
63 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
65 Efficiently Computing Static Single Assignment Form and the Control
67 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
68 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
71 J. Knoop, O. Ruthing, B. Steffen
72 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
74 What's In a Region? Or Computing Control Dependence Regions in Near-Linear
75 Time for Reducible Flow Control
77 ACM Letters on Programming Languages and Systems,
78 Vol. 2, Num. 1-4, Mar-Dec 1993
80 An Efficient Representation for Sparse Sets
81 Preston Briggs, Linda Torczon
82 ACM Letters on Programming Languages and Systems,
83 Vol. 2, Num. 1-4, Mar-Dec 1993
85 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
86 K-H Drechsler, M.P. Stadel
87 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
89 Partial Dead Code Elimination
90 J. Knoop, O. Ruthing, B. Steffen
91 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
93 Effective Partial Redundancy Elimination
94 P. Briggs, K.D. Cooper
95 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
97 The Program Structure Tree: Computing Control Regions in Linear Time
98 R. Johnson, D. Pearson, K. Pingali
99 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
101 Optimal Code Motion: Theory and Practice
102 J. Knoop, O. Ruthing, B. Steffen
103 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
105 The power of assignment motion
106 J. Knoop, O. Ruthing, B. Steffen
107 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
109 Global code motion / global value numbering
111 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
113 Value Driven Redundancy Elimination
115 Rice University Ph.D. thesis, Apr. 1996
119 Massively Scalar Compiler Project, Rice University, Sep. 1996
121 High Performance Compilers for Parallel Computing
125 Advanced Compiler Design and Implementation
127 Morgan Kaufmann, 1997
129 Building an Optimizing Compiler
133 People wishing to speed up the code here should read:
134 Elimination Algorithms for Data Flow Analysis
135 B.G. Ryder, M.C. Paull
136 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
138 How to Analyze Large Programs Efficiently and Informatively
139 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
140 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
142 People wishing to do something different can find various possibilities
143 in the above papers and elsewhere.
153 #include "hard-reg-set.h"
156 #include "insn-config.h"
158 #include "basic-block.h"
160 #include "function.h"
165 #define obstack_chunk_alloc gmalloc
166 #define obstack_chunk_free free
168 /* Maximum number of passes to perform. */
171 /* Propagate flow information through back edges and thus enable PRE's
172 moving loop invariant calculations out of loops.
174 Originally this tended to create worse overall code, but several
175 improvements during the development of PRE seem to have made following
176 back edges generally a win.
178 Note much of the loop invariant code motion done here would normally
179 be done by loop.c, which has more heuristics for when to move invariants
180 out of loops. At some point we might need to move some of those
181 heuristics into gcse.c. */
182 #define FOLLOW_BACK_EDGES 1
184 /* We support GCSE via Partial Redundancy Elimination. PRE optimizations
185 are a superset of those done by GCSE.
187 We perform the following steps:
189 1) Compute basic block information.
191 2) Compute table of places where registers are set.
193 3) Perform copy/constant propagation.
195 4) Perform global cse.
197 5) Perform another pass of copy/constant propagation.
199 Two passes of copy/constant propagation are done because the first one
200 enables more GCSE and the second one helps to clean up the copies that
201 GCSE creates. This is needed more for PRE than for Classic because Classic
202 GCSE will try to use an existing register containing the common
203 subexpression rather than create a new one. This is harder to do for PRE
204 because of the code motion (which Classic GCSE doesn't do).
206 Expressions we are interested in GCSE-ing are of the form
207 (set (pseudo-reg) (expression)).
208 Function want_to_gcse_p says what these are.
210 PRE handles moving invariant expressions out of loops (by treating them as
211 partially redundant).
213 Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
214 assignment) based GVN (global value numbering). L. T. Simpson's paper
215 (Rice University) on value numbering is a useful reference for this.
217 **********************
219 We used to support multiple passes but there are diminishing returns in
220 doing so. The first pass usually makes 90% of the changes that are doable.
221 A second pass can make a few more changes made possible by the first pass.
222 Experiments show any further passes don't make enough changes to justify
225 A study of spec92 using an unlimited number of passes:
226 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
227 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
228 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
230 It was found doing copy propagation between each pass enables further
233 PRE is quite expensive in complicated functions because the DFA can take
234 awhile to converge. Hence we only perform one pass. Macro MAX_PASSES can
235 be modified if one wants to experiment.
237 **********************
239 The steps for PRE are:
241 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
243 2) Perform the data flow analysis for PRE.
245 3) Delete the redundant instructions
247 4) Insert the required copies [if any] that make the partially
248 redundant instructions fully redundant.
250 5) For other reaching expressions, insert an instruction to copy the value
251 to a newly created pseudo that will reach the redundant instruction.
253 The deletion is done first so that when we do insertions we
254 know which pseudo reg to use.
256 Various papers have argued that PRE DFA is expensive (O(n^2)) and others
257 argue it is not. The number of iterations for the algorithm to converge
258 is typically 2-4 so I don't view it as that expensive (relatively speaking).
260 PRE GCSE depends heavily on the second CSE pass to clean up the copies
261 we create. To make an expression reach the place where it's redundant,
262 the result of the expression is copied to a new register, and the redundant
263 expression is deleted by replacing it with this new register. Classic GCSE
264 doesn't have this problem as much as it computes the reaching defs of
265 each register in each block and thus can try to use an existing register.
267 **********************
269 A fair bit of simplicity is created by creating small functions for simple
270 tasks, even when the function is only called in one place. This may
271 measurably slow things down [or may not] by creating more function call
272 overhead than is necessary. The source is laid out so that it's trivial
273 to make the affected functions inline so that one can measure what speed
274 up, if any, can be achieved, and maybe later when things settle things can
277 Help stamp out big monolithic functions! */
279 /* GCSE global vars. */
282 static FILE *gcse_file
;
284 /* Note whether or not we should run jump optimization after gcse. We
285 want to do this for two cases.
287 * If we changed any jumps via cprop.
289 * If we added any labels via edge splitting. */
291 static int run_jump_opt_after_gcse
;
293 /* Bitmaps are normally not included in debugging dumps.
294 However it's useful to be able to print them from GDB.
295 We could create special functions for this, but it's simpler to
296 just allow passing stderr to the dump_foo fns. Since stderr can
297 be a macro, we store a copy here. */
298 static FILE *debug_stderr
;
300 /* An obstack for our working variables. */
301 static struct obstack gcse_obstack
;
303 /* Non-zero for each mode that supports (set (reg) (reg)).
304 This is trivially true for integer and floating point values.
305 It may or may not be true for condition codes. */
306 static char can_copy_p
[(int) NUM_MACHINE_MODES
];
308 /* Non-zero if can_copy_p has been initialized. */
309 static int can_copy_init_p
;
311 struct reg_use
{rtx reg_rtx
; };
313 /* Hash table of expressions. */
317 /* The expression (SET_SRC for expressions, PATTERN for assignments). */
319 /* Index in the available expression bitmaps. */
321 /* Next entry with the same hash. */
322 struct expr
*next_same_hash
;
323 /* List of anticipatable occurrences in basic blocks in the function.
324 An "anticipatable occurrence" is one that is the first occurrence in the
325 basic block, the operands are not modified in the basic block prior
326 to the occurrence and the output is not used between the start of
327 the block and the occurrence. */
328 struct occr
*antic_occr
;
329 /* List of available occurrence in basic blocks in the function.
330 An "available occurrence" is one that is the last occurrence in the
331 basic block and the operands are not modified by following statements in
332 the basic block [including this insn]. */
333 struct occr
*avail_occr
;
334 /* Non-null if the computation is PRE redundant.
335 The value is the newly created pseudo-reg to record a copy of the
336 expression in all the places that reach the redundant copy. */
340 /* Occurrence of an expression.
341 There is one per basic block. If a pattern appears more than once the
342 last appearance is used [or first for anticipatable expressions]. */
346 /* Next occurrence of this expression. */
348 /* The insn that computes the expression. */
350 /* Non-zero if this [anticipatable] occurrence has been deleted. */
352 /* Non-zero if this [available] occurrence has been copied to
354 /* ??? This is mutually exclusive with deleted_p, so they could share
359 /* Expression and copy propagation hash tables.
360 Each hash table is an array of buckets.
361 ??? It is known that if it were an array of entries, structure elements
362 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
363 not clear whether in the final analysis a sufficient amount of memory would
364 be saved as the size of the available expression bitmaps would be larger
365 [one could build a mapping table without holes afterwards though].
366 Someday I'll perform the computation and figure it out. */
368 /* Total size of the expression hash table, in elements. */
369 static unsigned int expr_hash_table_size
;
372 This is an array of `expr_hash_table_size' elements. */
373 static struct expr
**expr_hash_table
;
375 /* Total size of the copy propagation hash table, in elements. */
376 static unsigned int set_hash_table_size
;
379 This is an array of `set_hash_table_size' elements. */
380 static struct expr
**set_hash_table
;
382 /* Mapping of uids to cuids.
383 Only real insns get cuids. */
384 static int *uid_cuid
;
386 /* Highest UID in UID_CUID. */
389 /* Get the cuid of an insn. */
390 #ifdef ENABLE_CHECKING
391 #define INSN_CUID(INSN) (INSN_UID (INSN) > max_uid ? (abort (), 0) : uid_cuid[INSN_UID (INSN)])
393 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
396 /* Number of cuids. */
399 /* Mapping of cuids to insns. */
400 static rtx
*cuid_insn
;
402 /* Get insn from cuid. */
403 #define CUID_INSN(CUID) (cuid_insn[CUID])
405 /* Maximum register number in function prior to doing gcse + 1.
406 Registers created during this pass have regno >= max_gcse_regno.
407 This is named with "gcse" to not collide with global of same name. */
408 static unsigned int max_gcse_regno
;
410 /* Maximum number of cse-able expressions found. */
413 /* Maximum number of assignments for copy propagation found. */
416 /* Table of registers that are modified.
418 For each register, each element is a list of places where the pseudo-reg
421 For simplicity, GCSE is done on sets of pseudo-regs only. PRE GCSE only
422 requires knowledge of which blocks kill which regs [and thus could use
423 a bitmap instead of the lists `reg_set_table' uses].
425 `reg_set_table' and could be turned into an array of bitmaps (num-bbs x
426 num-regs) [however perhaps it may be useful to keep the data as is]. One
427 advantage of recording things this way is that `reg_set_table' is fairly
428 sparse with respect to pseudo regs but for hard regs could be fairly dense
429 [relatively speaking]. And recording sets of pseudo-regs in lists speeds
430 up functions like compute_transp since in the case of pseudo-regs we only
431 need to iterate over the number of times a pseudo-reg is set, not over the
432 number of basic blocks [clearly there is a bit of a slow down in the cases
433 where a pseudo is set more than once in a block, however it is believed
434 that the net effect is to speed things up]. This isn't done for hard-regs
435 because recording call-clobbered hard-regs in `reg_set_table' at each
436 function call can consume a fair bit of memory, and iterating over
437 hard-regs stored this way in compute_transp will be more expensive. */
439 typedef struct reg_set
441 /* The next setting of this register. */
442 struct reg_set
*next
;
443 /* The insn where it was set. */
447 static reg_set
**reg_set_table
;
449 /* Size of `reg_set_table'.
450 The table starts out at max_gcse_regno + slop, and is enlarged as
452 static int reg_set_table_size
;
454 /* Amount to grow `reg_set_table' by when it's full. */
455 #define REG_SET_TABLE_SLOP 100
457 /* Bitmap containing one bit for each register in the program.
458 Used when performing GCSE to track which registers have been set since
459 the start of the basic block. */
460 static sbitmap reg_set_bitmap
;
462 /* For each block, a bitmap of registers set in the block.
463 This is used by expr_killed_p and compute_transp.
464 It is computed during hash table computation and not by compute_sets
465 as it includes registers added since the last pass (or between cprop and
466 gcse) and it's currently not easy to realloc sbitmap vectors. */
467 static sbitmap
*reg_set_in_block
;
469 /* For each block, non-zero if memory is set in that block.
470 This is computed during hash table computation and is used by
471 expr_killed_p and compute_transp.
472 ??? Handling of memory is very simple, we don't make any attempt
473 to optimize things (later).
474 ??? This can be computed by compute_sets since the information
476 static char *mem_set_in_block
;
478 /* Various variables for statistics gathering. */
480 /* Memory used in a pass.
481 This isn't intended to be absolutely precise. Its intent is only
482 to keep an eye on memory usage. */
483 static int bytes_used
;
485 /* GCSE substitutions made. */
486 static int gcse_subst_count
;
487 /* Number of copy instructions created. */
488 static int gcse_create_count
;
489 /* Number of constants propagated. */
490 static int const_prop_count
;
491 /* Number of copys propagated. */
492 static int copy_prop_count
;
494 /* These variables are used by classic GCSE.
495 Normally they'd be defined a bit later, but `rd_gen' needs to
496 be declared sooner. */
498 /* Each block has a bitmap of each type.
499 The length of each blocks bitmap is:
501 max_cuid - for reaching definitions
502 n_exprs - for available expressions
504 Thus we view the bitmaps as 2 dimensional arrays. i.e.
505 rd_kill[block_num][cuid_num]
506 ae_kill[block_num][expr_num] */
508 /* For reaching defs */
509 static sbitmap
*rd_kill
, *rd_gen
, *reaching_defs
, *rd_out
;
511 /* for available exprs */
512 static sbitmap
*ae_kill
, *ae_gen
, *ae_in
, *ae_out
;
514 /* Objects of this type are passed around by the null-pointer check
516 struct null_pointer_info
518 /* The basic block being processed. */
520 /* The first register to be handled in this pass. */
521 unsigned int min_reg
;
522 /* One greater than the last register to be handled in this pass. */
523 unsigned int max_reg
;
524 sbitmap
*nonnull_local
;
525 sbitmap
*nonnull_killed
;
528 static void compute_can_copy
PARAMS ((void));
529 static char *gmalloc
PARAMS ((unsigned int));
530 static char *grealloc
PARAMS ((char *, unsigned int));
531 static char *gcse_alloc
PARAMS ((unsigned long));
532 static void alloc_gcse_mem
PARAMS ((rtx
));
533 static void free_gcse_mem
PARAMS ((void));
534 static void alloc_reg_set_mem
PARAMS ((int));
535 static void free_reg_set_mem
PARAMS ((void));
536 static int get_bitmap_width
PARAMS ((int, int, int));
537 static void record_one_set
PARAMS ((int, rtx
));
538 static void record_set_info
PARAMS ((rtx
, rtx
, void *));
539 static void compute_sets
PARAMS ((rtx
));
540 static void hash_scan_insn
PARAMS ((rtx
, int, int));
541 static void hash_scan_set
PARAMS ((rtx
, rtx
, int));
542 static void hash_scan_clobber
PARAMS ((rtx
, rtx
));
543 static void hash_scan_call
PARAMS ((rtx
, rtx
));
544 static int want_to_gcse_p
PARAMS ((rtx
));
545 static int oprs_unchanged_p
PARAMS ((rtx
, rtx
, int));
546 static int oprs_anticipatable_p
PARAMS ((rtx
, rtx
));
547 static int oprs_available_p
PARAMS ((rtx
, rtx
));
548 static void insert_expr_in_table
PARAMS ((rtx
, enum machine_mode
, rtx
,
550 static void insert_set_in_table
PARAMS ((rtx
, rtx
));
551 static unsigned int hash_expr
PARAMS ((rtx
, enum machine_mode
, int *, int));
552 static unsigned int hash_expr_1
PARAMS ((rtx
, enum machine_mode
, int *));
553 static unsigned int hash_string_1
PARAMS ((const char *));
554 static unsigned int hash_set
PARAMS ((int, int));
555 static int expr_equiv_p
PARAMS ((rtx
, rtx
));
556 static void record_last_reg_set_info
PARAMS ((rtx
, int));
557 static void record_last_mem_set_info
PARAMS ((rtx
));
558 static void record_last_set_info
PARAMS ((rtx
, rtx
, void *));
559 static void compute_hash_table
PARAMS ((int));
560 static void alloc_set_hash_table
PARAMS ((int));
561 static void free_set_hash_table
PARAMS ((void));
562 static void compute_set_hash_table
PARAMS ((void));
563 static void alloc_expr_hash_table
PARAMS ((unsigned int));
564 static void free_expr_hash_table
PARAMS ((void));
565 static void compute_expr_hash_table
PARAMS ((void));
566 static void dump_hash_table
PARAMS ((FILE *, const char *, struct expr
**,
568 static struct expr
*lookup_expr
PARAMS ((rtx
));
569 static struct expr
*lookup_set
PARAMS ((unsigned int, rtx
));
570 static struct expr
*next_set
PARAMS ((unsigned int, struct expr
*));
571 static void reset_opr_set_tables
PARAMS ((void));
572 static int oprs_not_set_p
PARAMS ((rtx
, rtx
));
573 static void mark_call
PARAMS ((rtx
));
574 static void mark_set
PARAMS ((rtx
, rtx
));
575 static void mark_clobber
PARAMS ((rtx
, rtx
));
576 static void mark_oprs_set
PARAMS ((rtx
));
577 static void alloc_cprop_mem
PARAMS ((int, int));
578 static void free_cprop_mem
PARAMS ((void));
579 static void compute_transp
PARAMS ((rtx
, int, sbitmap
*, int));
580 static void compute_transpout
PARAMS ((void));
581 static void compute_local_properties
PARAMS ((sbitmap
*, sbitmap
*, sbitmap
*,
583 static void compute_cprop_data
PARAMS ((void));
584 static void find_used_regs
PARAMS ((rtx
));
585 static int try_replace_reg
PARAMS ((rtx
, rtx
, rtx
));
586 static struct expr
*find_avail_set
PARAMS ((int, rtx
));
587 static int cprop_jump
PARAMS ((rtx
, rtx
, struct reg_use
*, rtx
));
589 static int cprop_cc0_jump
PARAMS ((rtx
, struct reg_use
*, rtx
));
591 static int cprop_insn
PARAMS ((rtx
, int));
592 static int cprop
PARAMS ((int));
593 static int one_cprop_pass
PARAMS ((int, int));
594 static void alloc_pre_mem
PARAMS ((int, int));
595 static void free_pre_mem
PARAMS ((void));
596 static void compute_pre_data
PARAMS ((void));
597 static int pre_expr_reaches_here_p
PARAMS ((int, struct expr
*, int));
598 static void insert_insn_end_bb
PARAMS ((struct expr
*, int, int));
599 static void pre_insert_copy_insn
PARAMS ((struct expr
*, rtx
));
600 static void pre_insert_copies
PARAMS ((void));
601 static int pre_delete
PARAMS ((void));
602 static int pre_gcse
PARAMS ((void));
603 static int one_pre_gcse_pass
PARAMS ((int));
604 static void add_label_notes
PARAMS ((rtx
, rtx
));
605 static void alloc_code_hoist_mem
PARAMS ((int, int));
606 static void free_code_hoist_mem
PARAMS ((void));
607 static void compute_code_hoist_vbeinout
PARAMS ((void));
608 static void compute_code_hoist_data
PARAMS ((void));
609 static int hoist_expr_reaches_here_p
PARAMS ((int, int, int, char *));
610 static void hoist_code
PARAMS ((void));
611 static int one_code_hoisting_pass
PARAMS ((void));
612 static void alloc_rd_mem
PARAMS ((int, int));
613 static void free_rd_mem
PARAMS ((void));
614 static void handle_rd_kill_set
PARAMS ((rtx
, int, int));
615 static void compute_kill_rd
PARAMS ((void));
616 static void compute_rd
PARAMS ((void));
617 static void alloc_avail_expr_mem
PARAMS ((int, int));
618 static void free_avail_expr_mem
PARAMS ((void));
619 static void compute_ae_gen
PARAMS ((void));
620 static int expr_killed_p
PARAMS ((rtx
, int));
621 static void compute_ae_kill
PARAMS ((sbitmap
*, sbitmap
*));
622 static int expr_reaches_here_p
PARAMS ((struct occr
*, struct expr
*,
624 static rtx computing_insn
PARAMS ((struct expr
*, rtx
));
625 static int def_reaches_here_p
PARAMS ((rtx
, rtx
));
626 static int can_disregard_other_sets
PARAMS ((struct reg_set
**, rtx
, int));
627 static int handle_avail_expr
PARAMS ((rtx
, struct expr
*));
628 static int classic_gcse
PARAMS ((void));
629 static int one_classic_gcse_pass
PARAMS ((int));
630 static void invalidate_nonnull_info
PARAMS ((rtx
, rtx
, void *));
631 static void delete_null_pointer_checks_1
PARAMS ((varray_type
*, unsigned int *,
632 sbitmap
*, sbitmap
*,
633 struct null_pointer_info
*));
634 static rtx process_insert_insn
PARAMS ((struct expr
*));
635 static int pre_edge_insert
PARAMS ((struct edge_list
*, struct expr
**));
636 static int expr_reaches_here_p_work
PARAMS ((struct occr
*, struct expr
*,
638 static int pre_expr_reaches_here_p_work
PARAMS ((int, struct expr
*,
641 /* Entry point for global common subexpression elimination.
642 F is the first instruction in the function. */
650 /* Bytes used at start of pass. */
651 int initial_bytes_used
;
652 /* Maximum number of bytes used by a pass. */
654 /* Point to release obstack data from for each pass. */
655 char *gcse_obstack_bottom
;
657 /* We do not construct an accurate cfg in functions which call
658 setjmp, so just punt to be safe. */
659 if (current_function_calls_setjmp
)
662 /* Assume that we do not need to run jump optimizations after gcse. */
663 run_jump_opt_after_gcse
= 0;
665 /* For calling dump_foo fns from gdb. */
666 debug_stderr
= stderr
;
669 /* Identify the basic block information for this function, including
670 successors and predecessors. */
671 max_gcse_regno
= max_reg_num ();
674 dump_flow_info (file
);
676 /* Return if there's nothing to do. */
677 if (n_basic_blocks
<= 1)
680 /* Trying to perform global optimizations on flow graphs which have
681 a high connectivity will take a long time and is unlikely to be
684 In normal circumstances a cfg should have about twice has many edges
685 as blocks. But we do not want to punish small functions which have
686 a couple switch statements. So we require a relatively large number
687 of basic blocks and the ratio of edges to blocks to be high. */
688 if (n_basic_blocks
> 1000 && n_edges
/ n_basic_blocks
>= 20)
690 if (warn_disabled_optimization
)
691 warning ("GCSE disabled: %d > 1000 basic blocks and %d >= 20 edges/basic block",
692 n_basic_blocks
, n_edges
/ n_basic_blocks
);
696 /* If allocating memory for the cprop bitmap would take up too much
697 storage it's better just to disable the optimization. */
699 * SBITMAP_SET_SIZE (max_gcse_regno
)
700 * sizeof (SBITMAP_ELT_TYPE
)) > MAX_GCSE_MEMORY
)
702 if (warn_disabled_optimization
)
703 warning ("GCSE disabled: %d basic blocks and %d registers",
704 n_basic_blocks
, max_gcse_regno
);
709 /* See what modes support reg/reg copy operations. */
710 if (! can_copy_init_p
)
716 gcc_obstack_init (&gcse_obstack
);
719 /* Record where pseudo-registers are set. This data is kept accurate
720 during each pass. ??? We could also record hard-reg information here
721 [since it's unchanging], however it is currently done during hash table
724 It may be tempting to compute MEM set information here too, but MEM sets
725 will be subject to code motion one day and thus we need to compute
726 information about memory sets when we build the hash tables. */
728 alloc_reg_set_mem (max_gcse_regno
);
732 initial_bytes_used
= bytes_used
;
734 gcse_obstack_bottom
= gcse_alloc (1);
736 while (changed
&& pass
< MAX_PASSES
)
740 fprintf (file
, "GCSE pass %d\n\n", pass
+ 1);
742 /* Initialize bytes_used to the space for the pred/succ lists,
743 and the reg_set_table data. */
744 bytes_used
= initial_bytes_used
;
746 /* Each pass may create new registers, so recalculate each time. */
747 max_gcse_regno
= max_reg_num ();
751 /* Don't allow constant propagation to modify jumps
753 changed
= one_cprop_pass (pass
+ 1, 0);
756 changed
|= one_classic_gcse_pass (pass
+ 1);
759 changed
|= one_pre_gcse_pass (pass
+ 1);
761 alloc_reg_set_mem (max_reg_num ());
763 run_jump_opt_after_gcse
= 1;
766 if (max_pass_bytes
< bytes_used
)
767 max_pass_bytes
= bytes_used
;
769 /* Free up memory, then reallocate for code hoisting. We can
770 not re-use the existing allocated memory because the tables
771 will not have info for the insns or registers created by
772 partial redundancy elimination. */
775 /* It does not make sense to run code hoisting unless we optimizing
776 for code size -- it rarely makes programs faster, and can make
777 them bigger if we did partial redundancy elimination (when optimizing
778 for space, we use a classic gcse algorithm instead of partial
779 redundancy algorithms). */
782 max_gcse_regno
= max_reg_num ();
784 changed
|= one_code_hoisting_pass ();
787 if (max_pass_bytes
< bytes_used
)
788 max_pass_bytes
= bytes_used
;
793 fprintf (file
, "\n");
797 obstack_free (&gcse_obstack
, gcse_obstack_bottom
);
801 /* Do one last pass of copy propagation, including cprop into
802 conditional jumps. */
804 max_gcse_regno
= max_reg_num ();
806 /* This time, go ahead and allow cprop to alter jumps. */
807 one_cprop_pass (pass
+ 1, 1);
812 fprintf (file
, "GCSE of %s: %d basic blocks, ",
813 current_function_name
, n_basic_blocks
);
814 fprintf (file
, "%d pass%s, %d bytes\n\n",
815 pass
, pass
> 1 ? "es" : "", max_pass_bytes
);
818 obstack_free (&gcse_obstack
, NULL_PTR
);
820 return run_jump_opt_after_gcse
;
823 /* Misc. utilities. */
825 /* Compute which modes support reg/reg copy operations. */
831 #ifndef AVOID_CCMODE_COPIES
834 memset (can_copy_p
, 0, NUM_MACHINE_MODES
);
837 for (i
= 0; i
< NUM_MACHINE_MODES
; i
++)
838 if (GET_MODE_CLASS (i
) == MODE_CC
)
840 #ifdef AVOID_CCMODE_COPIES
843 reg
= gen_rtx_REG ((enum machine_mode
) i
, LAST_VIRTUAL_REGISTER
+ 1);
844 insn
= emit_insn (gen_rtx_SET (VOIDmode
, reg
, reg
));
845 if (recog (PATTERN (insn
), insn
, NULL_PTR
) >= 0)
855 /* Cover function to xmalloc to record bytes allocated. */
862 return xmalloc (size
);
865 /* Cover function to xrealloc.
866 We don't record the additional size since we don't know it.
867 It won't affect memory usage stats much anyway. */
874 return xrealloc (ptr
, size
);
877 /* Cover function to obstack_alloc.
878 We don't need to record the bytes allocated here since
879 obstack_chunk_alloc is set to gmalloc. */
885 return (char *) obstack_alloc (&gcse_obstack
, size
);
888 /* Allocate memory for the cuid mapping array,
889 and reg/memory set tracking tables.
891 This is called at the start of each pass. */
900 /* Find the largest UID and create a mapping from UIDs to CUIDs.
901 CUIDs are like UIDs except they increase monotonically, have no gaps,
902 and only apply to real insns. */
904 max_uid
= get_max_uid ();
905 n
= (max_uid
+ 1) * sizeof (int);
906 uid_cuid
= (int *) gmalloc (n
);
907 memset ((char *) uid_cuid
, 0, n
);
908 for (insn
= f
, i
= 0; insn
; insn
= NEXT_INSN (insn
))
911 uid_cuid
[INSN_UID (insn
)] = i
++;
913 uid_cuid
[INSN_UID (insn
)] = i
;
916 /* Create a table mapping cuids to insns. */
919 n
= (max_cuid
+ 1) * sizeof (rtx
);
920 cuid_insn
= (rtx
*) gmalloc (n
);
921 memset ((char *) cuid_insn
, 0, n
);
922 for (insn
= f
, i
= 0; insn
; insn
= NEXT_INSN (insn
))
924 CUID_INSN (i
++) = insn
;
926 /* Allocate vars to track sets of regs. */
927 reg_set_bitmap
= (sbitmap
) sbitmap_alloc (max_gcse_regno
);
929 /* Allocate vars to track sets of regs, memory per block. */
930 reg_set_in_block
= (sbitmap
*) sbitmap_vector_alloc (n_basic_blocks
,
932 mem_set_in_block
= (char *) gmalloc (n_basic_blocks
);
935 /* Free memory allocated by alloc_gcse_mem. */
943 free (reg_set_bitmap
);
945 free (reg_set_in_block
);
946 free (mem_set_in_block
);
949 /* Many of the global optimization algorithms work by solving dataflow
950 equations for various expressions. Initially, some local value is
951 computed for each expression in each block. Then, the values across the
952 various blocks are combined (by following flow graph edges) to arrive at
953 global values. Conceptually, each set of equations is independent. We
954 may therefore solve all the equations in parallel, solve them one at a
955 time, or pick any intermediate approach.
957 When you're going to need N two-dimensional bitmaps, each X (say, the
958 number of blocks) by Y (say, the number of expressions), call this
959 function. It's not important what X and Y represent; only that Y
960 correspond to the things that can be done in parallel. This function will
961 return an appropriate chunking factor C; you should solve C sets of
962 equations in parallel. By going through this function, we can easily
963 trade space against time; by solving fewer equations in parallel we use
967 get_bitmap_width (n
, x
, y
)
972 /* It's not really worth figuring out *exactly* how much memory will
973 be used by a particular choice. The important thing is to get
974 something approximately right. */
975 size_t max_bitmap_memory
= 10 * 1024 * 1024;
977 /* The number of bytes we'd use for a single column of minimum
979 size_t column_size
= n
* x
* sizeof (SBITMAP_ELT_TYPE
);
981 /* Often, it's reasonable just to solve all the equations in
983 if (column_size
* SBITMAP_SET_SIZE (y
) <= max_bitmap_memory
)
986 /* Otherwise, pick the largest width we can, without going over the
988 return SBITMAP_ELT_BITS
* ((max_bitmap_memory
+ column_size
- 1)
992 /* Compute the local properties of each recorded expression.
994 Local properties are those that are defined by the block, irrespective of
997 An expression is transparent in a block if its operands are not modified
1000 An expression is computed (locally available) in a block if it is computed
1001 at least once and expression would contain the same value if the
1002 computation was moved to the end of the block.
1004 An expression is locally anticipatable in a block if it is computed at
1005 least once and expression would contain the same value if the computation
1006 was moved to the beginning of the block.
1008 We call this routine for cprop, pre and code hoisting. They all compute
1009 basically the same information and thus can easily share this code.
1011 TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
1012 properties. If NULL, then it is not necessary to compute or record that
1013 particular property.
1015 SETP controls which hash table to look at. If zero, this routine looks at
1016 the expr hash table; if nonzero this routine looks at the set hash table.
1017 Additionally, TRANSP is computed as ~TRANSP, since this is really cprop's
1021 compute_local_properties (transp
, comp
, antloc
, setp
)
1027 unsigned int i
, hash_table_size
;
1028 struct expr
**hash_table
;
1030 /* Initialize any bitmaps that were passed in. */
1034 sbitmap_vector_zero (transp
, n_basic_blocks
);
1036 sbitmap_vector_ones (transp
, n_basic_blocks
);
1040 sbitmap_vector_zero (comp
, n_basic_blocks
);
1042 sbitmap_vector_zero (antloc
, n_basic_blocks
);
1044 /* We use the same code for cprop, pre and hoisting. For cprop
1045 we care about the set hash table, for pre and hoisting we
1046 care about the expr hash table. */
1047 hash_table_size
= setp
? set_hash_table_size
: expr_hash_table_size
;
1048 hash_table
= setp
? set_hash_table
: expr_hash_table
;
1050 for (i
= 0; i
< hash_table_size
; i
++)
1054 for (expr
= hash_table
[i
]; expr
!= NULL
; expr
= expr
->next_same_hash
)
1056 int indx
= expr
->bitmap_index
;
1059 /* The expression is transparent in this block if it is not killed.
1060 We start by assuming all are transparent [none are killed], and
1061 then reset the bits for those that are. */
1063 compute_transp (expr
->expr
, indx
, transp
, setp
);
1065 /* The occurrences recorded in antic_occr are exactly those that
1066 we want to set to non-zero in ANTLOC. */
1068 for (occr
= expr
->antic_occr
; occr
!= NULL
; occr
= occr
->next
)
1070 SET_BIT (antloc
[BLOCK_NUM (occr
->insn
)], indx
);
1072 /* While we're scanning the table, this is a good place to
1074 occr
->deleted_p
= 0;
1077 /* The occurrences recorded in avail_occr are exactly those that
1078 we want to set to non-zero in COMP. */
1080 for (occr
= expr
->avail_occr
; occr
!= NULL
; occr
= occr
->next
)
1082 SET_BIT (comp
[BLOCK_NUM (occr
->insn
)], indx
);
1084 /* While we're scanning the table, this is a good place to
1089 /* While we're scanning the table, this is a good place to
1091 expr
->reaching_reg
= 0;
1096 /* Register set information.
1098 `reg_set_table' records where each register is set or otherwise
1101 static struct obstack reg_set_obstack
;
1104 alloc_reg_set_mem (n_regs
)
1109 reg_set_table_size
= n_regs
+ REG_SET_TABLE_SLOP
;
1110 n
= reg_set_table_size
* sizeof (struct reg_set
*);
1111 reg_set_table
= (struct reg_set
**) gmalloc (n
);
1112 memset ((char *) reg_set_table
, 0, n
);
1114 gcc_obstack_init (®_set_obstack
);
1120 free (reg_set_table
);
1121 obstack_free (®_set_obstack
, NULL_PTR
);
1124 /* Record REGNO in the reg_set table. */
1127 record_one_set (regno
, insn
)
1131 /* allocate a new reg_set element and link it onto the list */
1132 struct reg_set
*new_reg_info
;
1134 /* If the table isn't big enough, enlarge it. */
1135 if (regno
>= reg_set_table_size
)
1137 int new_size
= regno
+ REG_SET_TABLE_SLOP
;
1140 = (struct reg_set
**) grealloc ((char *) reg_set_table
,
1141 new_size
* sizeof (struct reg_set
*));
1142 memset ((char *) (reg_set_table
+ reg_set_table_size
), 0,
1143 (new_size
- reg_set_table_size
) * sizeof (struct reg_set
*));
1144 reg_set_table_size
= new_size
;
1147 new_reg_info
= (struct reg_set
*) obstack_alloc (®_set_obstack
,
1148 sizeof (struct reg_set
));
1149 bytes_used
+= sizeof (struct reg_set
);
1150 new_reg_info
->insn
= insn
;
1151 new_reg_info
->next
= reg_set_table
[regno
];
1152 reg_set_table
[regno
] = new_reg_info
;
1155 /* Called from compute_sets via note_stores to handle one SET or CLOBBER in
1156 an insn. The DATA is really the instruction in which the SET is
1160 record_set_info (dest
, setter
, data
)
1161 rtx dest
, setter ATTRIBUTE_UNUSED
;
1164 rtx record_set_insn
= (rtx
) data
;
1166 if (GET_CODE (dest
) == REG
&& REGNO (dest
) >= FIRST_PSEUDO_REGISTER
)
1167 record_one_set (REGNO (dest
), record_set_insn
);
1170 /* Scan the function and record each set of each pseudo-register.
1172 This is called once, at the start of the gcse pass. See the comments for
1173 `reg_set_table' for further documenation. */
1181 for (insn
= f
; insn
!= 0; insn
= NEXT_INSN (insn
))
1183 note_stores (PATTERN (insn
), record_set_info
, insn
);
1186 /* Hash table support. */
1188 /* For each register, the cuid of the first/last insn in the block to set it,
1189 or -1 if not set. */
1190 #define NEVER_SET -1
1191 static int *reg_first_set
;
1192 static int *reg_last_set
;
1194 /* While computing "first/last set" info, this is the CUID of first/last insn
1195 to set memory or -1 if not set. `mem_last_set' is also used when
1196 performing GCSE to record whether memory has been set since the beginning
1199 Note that handling of memory is very simple, we don't make any attempt
1200 to optimize things (later). */
1201 static int mem_first_set
;
1202 static int mem_last_set
;
1204 /* Perform a quick check whether X, the source of a set, is something
1205 we want to consider for GCSE. */
1211 switch (GET_CODE (x
))
1227 /* Return non-zero if the operands of expression X are unchanged from the
1228 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1229 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */
1232 oprs_unchanged_p (x
, insn
, avail_p
)
1243 code
= GET_CODE (x
);
1248 return (reg_last_set
[REGNO (x
)] == NEVER_SET
1249 || reg_last_set
[REGNO (x
)] < INSN_CUID (insn
));
1251 return (reg_first_set
[REGNO (x
)] == NEVER_SET
1252 || reg_first_set
[REGNO (x
)] >= INSN_CUID (insn
));
1255 if (avail_p
&& mem_last_set
!= NEVER_SET
1256 && mem_last_set
>= INSN_CUID (insn
))
1258 else if (! avail_p
&& mem_first_set
!= NEVER_SET
1259 && mem_first_set
< INSN_CUID (insn
))
1262 return oprs_unchanged_p (XEXP (x
, 0), insn
, avail_p
);
1287 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
1291 /* If we are about to do the last recursive call needed at this
1292 level, change it into iteration. This function is called enough
1295 return oprs_unchanged_p (XEXP (x
, i
), insn
, avail_p
);
1297 else if (! oprs_unchanged_p (XEXP (x
, i
), insn
, avail_p
))
1300 else if (fmt
[i
] == 'E')
1301 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
1302 if (! oprs_unchanged_p (XVECEXP (x
, i
, j
), insn
, avail_p
))
1309 /* Return non-zero if the operands of expression X are unchanged from
1310 the start of INSN's basic block up to but not including INSN. */
1313 oprs_anticipatable_p (x
, insn
)
1316 return oprs_unchanged_p (x
, insn
, 0);
1319 /* Return non-zero if the operands of expression X are unchanged from
1320 INSN to the end of INSN's basic block. */
1323 oprs_available_p (x
, insn
)
1326 return oprs_unchanged_p (x
, insn
, 1);
1329 /* Hash expression X.
1331 MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean
1332 indicating if a volatile operand is found or if the expression contains
1333 something we don't want to insert in the table.
1335 ??? One might want to merge this with canon_hash. Later. */
1338 hash_expr (x
, mode
, do_not_record_p
, hash_table_size
)
1340 enum machine_mode mode
;
1341 int *do_not_record_p
;
1342 int hash_table_size
;
1346 *do_not_record_p
= 0;
1348 hash
= hash_expr_1 (x
, mode
, do_not_record_p
);
1349 return hash
% hash_table_size
;
1351 /* Hash a string. Just add its bytes up. */
1352 static inline unsigned
1357 const unsigned char *p
= (const unsigned char *)ps
;
1366 /* Subroutine of hash_expr to do the actual work. */
1369 hash_expr_1 (x
, mode
, do_not_record_p
)
1371 enum machine_mode mode
;
1372 int *do_not_record_p
;
1379 /* Used to turn recursion into iteration. We can't rely on GCC's
1380 tail-recursion eliminatio since we need to keep accumulating values
1387 code
= GET_CODE (x
);
1391 hash
+= ((unsigned int) REG
<< 7) + REGNO (x
);
1395 hash
+= (((unsigned int) CONST_INT
<< 7) + (unsigned int) mode
1396 + (unsigned int) INTVAL (x
));
1400 /* This is like the general case, except that it only counts
1401 the integers representing the constant. */
1402 hash
+= (unsigned int) code
+ (unsigned int) GET_MODE (x
);
1403 if (GET_MODE (x
) != VOIDmode
)
1404 for (i
= 2; i
< GET_RTX_LENGTH (CONST_DOUBLE
); i
++)
1405 hash
+= (unsigned int) XWINT (x
, i
);
1407 hash
+= ((unsigned int) CONST_DOUBLE_LOW (x
)
1408 + (unsigned int) CONST_DOUBLE_HIGH (x
));
1411 /* Assume there is only one rtx object for any given label. */
1413 /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1414 differences and differences between each stage's debugging dumps. */
1415 hash
+= (((unsigned int) LABEL_REF
<< 7)
1416 + CODE_LABEL_NUMBER (XEXP (x
, 0)));
1421 /* Don't hash on the symbol's address to avoid bootstrap differences.
1422 Different hash values may cause expressions to be recorded in
1423 different orders and thus different registers to be used in the
1424 final assembler. This also avoids differences in the dump files
1425 between various stages. */
1427 const unsigned char *p
= (const unsigned char *) XSTR (x
, 0);
1430 h
+= (h
<< 7) + *p
++; /* ??? revisit */
1432 hash
+= ((unsigned int) SYMBOL_REF
<< 7) + h
;
1437 if (MEM_VOLATILE_P (x
))
1439 *do_not_record_p
= 1;
1443 hash
+= (unsigned int) MEM
;
1444 hash
+= MEM_ALIAS_SET (x
);
1455 case UNSPEC_VOLATILE
:
1456 *do_not_record_p
= 1;
1460 if (MEM_VOLATILE_P (x
))
1462 *do_not_record_p
= 1;
1467 /* We don't want to take the filename and line into account. */
1468 hash
+= (unsigned) code
+ (unsigned) GET_MODE (x
)
1469 + hash_string_1 (ASM_OPERANDS_TEMPLATE (x
))
1470 + hash_string_1 (ASM_OPERANDS_OUTPUT_CONSTRAINT (x
))
1471 + (unsigned) ASM_OPERANDS_OUTPUT_IDX (x
);
1473 if (ASM_OPERANDS_INPUT_LENGTH (x
))
1475 for (i
= 1; i
< ASM_OPERANDS_INPUT_LENGTH (x
); i
++)
1477 hash
+= (hash_expr_1 (ASM_OPERANDS_INPUT (x
, i
),
1478 GET_MODE (ASM_OPERANDS_INPUT (x
, i
)),
1480 + hash_string_1 (ASM_OPERANDS_INPUT_CONSTRAINT
1484 hash
+= hash_string_1 (ASM_OPERANDS_INPUT_CONSTRAINT (x
, 0));
1485 x
= ASM_OPERANDS_INPUT (x
, 0);
1486 mode
= GET_MODE (x
);
1496 hash
+= (unsigned) code
+ (unsigned) GET_MODE (x
);
1497 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
1501 /* If we are about to do the last recursive call
1502 needed at this level, change it into iteration.
1503 This function is called enough to be worth it. */
1510 hash
+= hash_expr_1 (XEXP (x
, i
), 0, do_not_record_p
);
1511 if (*do_not_record_p
)
1515 else if (fmt
[i
] == 'E')
1516 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
1518 hash
+= hash_expr_1 (XVECEXP (x
, i
, j
), 0, do_not_record_p
);
1519 if (*do_not_record_p
)
1523 else if (fmt
[i
] == 's')
1524 hash
+= hash_string_1 (XSTR (x
, i
));
1525 else if (fmt
[i
] == 'i')
1526 hash
+= (unsigned int) XINT (x
, i
);
1534 /* Hash a set of register REGNO.
1536 Sets are hashed on the register that is set. This simplifies the PRE copy
1539 ??? May need to make things more elaborate. Later, as necessary. */
1542 hash_set (regno
, hash_table_size
)
1544 int hash_table_size
;
1549 return hash
% hash_table_size
;
1552 /* Return non-zero if exp1 is equivalent to exp2.
1553 ??? Borrowed from cse.c. Might want to remerge with cse.c. Later. */
1560 register enum rtx_code code
;
1561 register const char *fmt
;
1566 if (x
== 0 || y
== 0)
1569 code
= GET_CODE (x
);
1570 if (code
!= GET_CODE (y
))
1573 /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent. */
1574 if (GET_MODE (x
) != GET_MODE (y
))
1584 return INTVAL (x
) == INTVAL (y
);
1587 return XEXP (x
, 0) == XEXP (y
, 0);
1590 return XSTR (x
, 0) == XSTR (y
, 0);
1593 return REGNO (x
) == REGNO (y
);
1596 /* Can't merge two expressions in different alias sets, since we can
1597 decide that the expression is transparent in a block when it isn't,
1598 due to it being set with the different alias set. */
1599 if (MEM_ALIAS_SET (x
) != MEM_ALIAS_SET (y
))
1603 /* For commutative operations, check both orders. */
1611 return ((expr_equiv_p (XEXP (x
, 0), XEXP (y
, 0))
1612 && expr_equiv_p (XEXP (x
, 1), XEXP (y
, 1)))
1613 || (expr_equiv_p (XEXP (x
, 0), XEXP (y
, 1))
1614 && expr_equiv_p (XEXP (x
, 1), XEXP (y
, 0))));
1617 /* We don't use the generic code below because we want to
1618 disregard filename and line numbers. */
1620 /* A volatile asm isn't equivalent to any other. */
1621 if (MEM_VOLATILE_P (x
) || MEM_VOLATILE_P (y
))
1624 if (GET_MODE (x
) != GET_MODE (y
)
1625 || strcmp (ASM_OPERANDS_TEMPLATE (x
), ASM_OPERANDS_TEMPLATE (y
))
1626 || strcmp (ASM_OPERANDS_OUTPUT_CONSTRAINT (x
),
1627 ASM_OPERANDS_OUTPUT_CONSTRAINT (y
))
1628 || ASM_OPERANDS_OUTPUT_IDX (x
) != ASM_OPERANDS_OUTPUT_IDX (y
)
1629 || ASM_OPERANDS_INPUT_LENGTH (x
) != ASM_OPERANDS_INPUT_LENGTH (y
))
1632 if (ASM_OPERANDS_INPUT_LENGTH (x
))
1634 for (i
= ASM_OPERANDS_INPUT_LENGTH (x
) - 1; i
>= 0; i
--)
1635 if (! expr_equiv_p (ASM_OPERANDS_INPUT (x
, i
),
1636 ASM_OPERANDS_INPUT (y
, i
))
1637 || strcmp (ASM_OPERANDS_INPUT_CONSTRAINT (x
, i
),
1638 ASM_OPERANDS_INPUT_CONSTRAINT (y
, i
)))
1648 /* Compare the elements. If any pair of corresponding elements
1649 fail to match, return 0 for the whole thing. */
1651 fmt
= GET_RTX_FORMAT (code
);
1652 for (i
= GET_RTX_LENGTH (code
) - 1; i
>= 0; i
--)
1657 if (! expr_equiv_p (XEXP (x
, i
), XEXP (y
, i
)))
1662 if (XVECLEN (x
, i
) != XVECLEN (y
, i
))
1664 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
1665 if (! expr_equiv_p (XVECEXP (x
, i
, j
), XVECEXP (y
, i
, j
)))
1670 if (strcmp (XSTR (x
, i
), XSTR (y
, i
)))
1675 if (XINT (x
, i
) != XINT (y
, i
))
1680 if (XWINT (x
, i
) != XWINT (y
, i
))
1695 /* Insert expression X in INSN in the hash table.
1696 If it is already present, record it as the last occurrence in INSN's
1699 MODE is the mode of the value X is being stored into.
1700 It is only used if X is a CONST_INT.
1702 ANTIC_P is non-zero if X is an anticipatable expression.
1703 AVAIL_P is non-zero if X is an available expression. */
1706 insert_expr_in_table (x
, mode
, insn
, antic_p
, avail_p
)
1708 enum machine_mode mode
;
1710 int antic_p
, avail_p
;
1712 int found
, do_not_record_p
;
1714 struct expr
*cur_expr
, *last_expr
= NULL
;
1715 struct occr
*antic_occr
, *avail_occr
;
1716 struct occr
*last_occr
= NULL
;
1718 hash
= hash_expr (x
, mode
, &do_not_record_p
, expr_hash_table_size
);
1720 /* Do not insert expression in table if it contains volatile operands,
1721 or if hash_expr determines the expression is something we don't want
1722 to or can't handle. */
1723 if (do_not_record_p
)
1726 cur_expr
= expr_hash_table
[hash
];
1729 while (cur_expr
&& 0 == (found
= expr_equiv_p (cur_expr
->expr
, x
)))
1731 /* If the expression isn't found, save a pointer to the end of
1733 last_expr
= cur_expr
;
1734 cur_expr
= cur_expr
->next_same_hash
;
1739 cur_expr
= (struct expr
*) gcse_alloc (sizeof (struct expr
));
1740 bytes_used
+= sizeof (struct expr
);
1741 if (expr_hash_table
[hash
] == NULL
)
1742 /* This is the first pattern that hashed to this index. */
1743 expr_hash_table
[hash
] = cur_expr
;
1745 /* Add EXPR to end of this hash chain. */
1746 last_expr
->next_same_hash
= cur_expr
;
1748 /* Set the fields of the expr element. */
1750 cur_expr
->bitmap_index
= n_exprs
++;
1751 cur_expr
->next_same_hash
= NULL
;
1752 cur_expr
->antic_occr
= NULL
;
1753 cur_expr
->avail_occr
= NULL
;
1756 /* Now record the occurrence(s). */
1759 antic_occr
= cur_expr
->antic_occr
;
1761 /* Search for another occurrence in the same basic block. */
1762 while (antic_occr
&& BLOCK_NUM (antic_occr
->insn
) != BLOCK_NUM (insn
))
1764 /* If an occurrence isn't found, save a pointer to the end of
1766 last_occr
= antic_occr
;
1767 antic_occr
= antic_occr
->next
;
1771 /* Found another instance of the expression in the same basic block.
1772 Prefer the currently recorded one. We want the first one in the
1773 block and the block is scanned from start to end. */
1774 ; /* nothing to do */
1777 /* First occurrence of this expression in this basic block. */
1778 antic_occr
= (struct occr
*) gcse_alloc (sizeof (struct occr
));
1779 bytes_used
+= sizeof (struct occr
);
1780 /* First occurrence of this expression in any block? */
1781 if (cur_expr
->antic_occr
== NULL
)
1782 cur_expr
->antic_occr
= antic_occr
;
1784 last_occr
->next
= antic_occr
;
1786 antic_occr
->insn
= insn
;
1787 antic_occr
->next
= NULL
;
1793 avail_occr
= cur_expr
->avail_occr
;
1795 /* Search for another occurrence in the same basic block. */
1796 while (avail_occr
&& BLOCK_NUM (avail_occr
->insn
) != BLOCK_NUM (insn
))
1798 /* If an occurrence isn't found, save a pointer to the end of
1800 last_occr
= avail_occr
;
1801 avail_occr
= avail_occr
->next
;
1805 /* Found another instance of the expression in the same basic block.
1806 Prefer this occurrence to the currently recorded one. We want
1807 the last one in the block and the block is scanned from start
1809 avail_occr
->insn
= insn
;
1812 /* First occurrence of this expression in this basic block. */
1813 avail_occr
= (struct occr
*) gcse_alloc (sizeof (struct occr
));
1814 bytes_used
+= sizeof (struct occr
);
1816 /* First occurrence of this expression in any block? */
1817 if (cur_expr
->avail_occr
== NULL
)
1818 cur_expr
->avail_occr
= avail_occr
;
1820 last_occr
->next
= avail_occr
;
1822 avail_occr
->insn
= insn
;
1823 avail_occr
->next
= NULL
;
1828 /* Insert pattern X in INSN in the hash table.
1829 X is a SET of a reg to either another reg or a constant.
1830 If it is already present, record it as the last occurrence in INSN's
1834 insert_set_in_table (x
, insn
)
1840 struct expr
*cur_expr
, *last_expr
= NULL
;
1841 struct occr
*cur_occr
, *last_occr
= NULL
;
1843 if (GET_CODE (x
) != SET
1844 || GET_CODE (SET_DEST (x
)) != REG
)
1847 hash
= hash_set (REGNO (SET_DEST (x
)), set_hash_table_size
);
1849 cur_expr
= set_hash_table
[hash
];
1852 while (cur_expr
&& 0 == (found
= expr_equiv_p (cur_expr
->expr
, x
)))
1854 /* If the expression isn't found, save a pointer to the end of
1856 last_expr
= cur_expr
;
1857 cur_expr
= cur_expr
->next_same_hash
;
1862 cur_expr
= (struct expr
*) gcse_alloc (sizeof (struct expr
));
1863 bytes_used
+= sizeof (struct expr
);
1864 if (set_hash_table
[hash
] == NULL
)
1865 /* This is the first pattern that hashed to this index. */
1866 set_hash_table
[hash
] = cur_expr
;
1868 /* Add EXPR to end of this hash chain. */
1869 last_expr
->next_same_hash
= cur_expr
;
1871 /* Set the fields of the expr element.
1872 We must copy X because it can be modified when copy propagation is
1873 performed on its operands. */
1874 /* ??? Should this go in a different obstack? */
1875 cur_expr
->expr
= copy_rtx (x
);
1876 cur_expr
->bitmap_index
= n_sets
++;
1877 cur_expr
->next_same_hash
= NULL
;
1878 cur_expr
->antic_occr
= NULL
;
1879 cur_expr
->avail_occr
= NULL
;
1882 /* Now record the occurrence. */
1883 cur_occr
= cur_expr
->avail_occr
;
1885 /* Search for another occurrence in the same basic block. */
1886 while (cur_occr
&& BLOCK_NUM (cur_occr
->insn
) != BLOCK_NUM (insn
))
1888 /* If an occurrence isn't found, save a pointer to the end of
1890 last_occr
= cur_occr
;
1891 cur_occr
= cur_occr
->next
;
1895 /* Found another instance of the expression in the same basic block.
1896 Prefer this occurrence to the currently recorded one. We want the
1897 last one in the block and the block is scanned from start to end. */
1898 cur_occr
->insn
= insn
;
1901 /* First occurrence of this expression in this basic block. */
1902 cur_occr
= (struct occr
*) gcse_alloc (sizeof (struct occr
));
1903 bytes_used
+= sizeof (struct occr
);
1905 /* First occurrence of this expression in any block? */
1906 if (cur_expr
->avail_occr
== NULL
)
1907 cur_expr
->avail_occr
= cur_occr
;
1909 last_occr
->next
= cur_occr
;
1911 cur_occr
->insn
= insn
;
1912 cur_occr
->next
= NULL
;
1916 /* Scan pattern PAT of INSN and add an entry to the hash table. If SET_P is
1917 non-zero, this is for the assignment hash table, otherwise it is for the
1918 expression hash table. */
1921 hash_scan_set (pat
, insn
, set_p
)
1925 rtx src
= SET_SRC (pat
);
1926 rtx dest
= SET_DEST (pat
);
1928 if (GET_CODE (src
) == CALL
)
1929 hash_scan_call (src
, insn
);
1931 if (GET_CODE (dest
) == REG
)
1933 int regno
= REGNO (dest
);
1936 /* Only record sets of pseudo-regs in the hash table. */
1938 && regno
>= FIRST_PSEUDO_REGISTER
1939 /* Don't GCSE something if we can't do a reg/reg copy. */
1940 && can_copy_p
[GET_MODE (dest
)]
1941 /* Is SET_SRC something we want to gcse? */
1942 && want_to_gcse_p (src
))
1944 /* An expression is not anticipatable if its operands are
1945 modified before this insn. */
1946 int antic_p
= oprs_anticipatable_p (src
, insn
);
1947 /* An expression is not available if its operands are
1948 subsequently modified, including this insn. */
1949 int avail_p
= oprs_available_p (src
, insn
);
1951 insert_expr_in_table (src
, GET_MODE (dest
), insn
, antic_p
, avail_p
);
1954 /* Record sets for constant/copy propagation. */
1956 && regno
>= FIRST_PSEUDO_REGISTER
1957 && ((GET_CODE (src
) == REG
1958 && REGNO (src
) >= FIRST_PSEUDO_REGISTER
1959 && can_copy_p
[GET_MODE (dest
)])
1960 || GET_CODE (src
) == CONST_INT
1961 || GET_CODE (src
) == SYMBOL_REF
1962 || GET_CODE (src
) == CONST_DOUBLE
)
1963 /* A copy is not available if its src or dest is subsequently
1964 modified. Here we want to search from INSN+1 on, but
1965 oprs_available_p searches from INSN on. */
1966 && (insn
== BLOCK_END (BLOCK_NUM (insn
))
1967 || ((tmp
= next_nonnote_insn (insn
)) != NULL_RTX
1968 && oprs_available_p (pat
, tmp
))))
1969 insert_set_in_table (pat
, insn
);
1974 hash_scan_clobber (x
, insn
)
1975 rtx x ATTRIBUTE_UNUSED
, insn ATTRIBUTE_UNUSED
;
1977 /* Currently nothing to do. */
1981 hash_scan_call (x
, insn
)
1982 rtx x ATTRIBUTE_UNUSED
, insn ATTRIBUTE_UNUSED
;
1984 /* Currently nothing to do. */
1987 /* Process INSN and add hash table entries as appropriate.
1989 Only available expressions that set a single pseudo-reg are recorded.
1991 Single sets in a PARALLEL could be handled, but it's an extra complication
1992 that isn't dealt with right now. The trick is handling the CLOBBERs that
1993 are also in the PARALLEL. Later.
1995 If SET_P is non-zero, this is for the assignment hash table,
1996 otherwise it is for the expression hash table.
1997 If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
1998 not record any expressions. */
2001 hash_scan_insn (insn
, set_p
, in_libcall_block
)
2004 int in_libcall_block
;
2006 rtx pat
= PATTERN (insn
);
2009 /* Pick out the sets of INSN and for other forms of instructions record
2010 what's been modified. */
2012 if (GET_CODE (pat
) == SET
&& ! in_libcall_block
)
2014 /* Ignore obvious no-ops. */
2015 if (SET_SRC (pat
) != SET_DEST (pat
))
2016 hash_scan_set (pat
, insn
, set_p
);
2018 else if (GET_CODE (pat
) == PARALLEL
)
2019 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
2021 rtx x
= XVECEXP (pat
, 0, i
);
2023 if (GET_CODE (x
) == SET
)
2025 if (GET_CODE (SET_SRC (x
)) == CALL
)
2026 hash_scan_call (SET_SRC (x
), insn
);
2028 else if (GET_CODE (x
) == CLOBBER
)
2029 hash_scan_clobber (x
, insn
);
2030 else if (GET_CODE (x
) == CALL
)
2031 hash_scan_call (x
, insn
);
2034 else if (GET_CODE (pat
) == CLOBBER
)
2035 hash_scan_clobber (pat
, insn
);
2036 else if (GET_CODE (pat
) == CALL
)
2037 hash_scan_call (pat
, insn
);
2041 dump_hash_table (file
, name
, table
, table_size
, total_size
)
2044 struct expr
**table
;
2045 int table_size
, total_size
;
2048 /* Flattened out table, so it's printed in proper order. */
2049 struct expr
**flat_table
;
2050 unsigned int *hash_val
;
2054 = (struct expr
**) xcalloc (total_size
, sizeof (struct expr
*));
2055 hash_val
= (unsigned int *) xmalloc (total_size
* sizeof (unsigned int));
2057 for (i
= 0; i
< table_size
; i
++)
2058 for (expr
= table
[i
]; expr
!= NULL
; expr
= expr
->next_same_hash
)
2060 flat_table
[expr
->bitmap_index
] = expr
;
2061 hash_val
[expr
->bitmap_index
] = i
;
2064 fprintf (file
, "%s hash table (%d buckets, %d entries)\n",
2065 name
, table_size
, total_size
);
2067 for (i
= 0; i
< total_size
; i
++)
2068 if (flat_table
[i
] != 0)
2070 expr
= flat_table
[i
];
2071 fprintf (file
, "Index %d (hash value %d)\n ",
2072 expr
->bitmap_index
, hash_val
[i
]);
2073 print_rtl (file
, expr
->expr
);
2074 fprintf (file
, "\n");
2077 fprintf (file
, "\n");
2083 /* Record register first/last/block set information for REGNO in INSN.
2085 reg_first_set records the first place in the block where the register
2086 is set and is used to compute "anticipatability".
2088 reg_last_set records the last place in the block where the register
2089 is set and is used to compute "availability".
2091 reg_set_in_block records whether the register is set in the block
2092 and is used to compute "transparency". */
2095 record_last_reg_set_info (insn
, regno
)
2099 if (reg_first_set
[regno
] == NEVER_SET
)
2100 reg_first_set
[regno
] = INSN_CUID (insn
);
2102 reg_last_set
[regno
] = INSN_CUID (insn
);
2103 SET_BIT (reg_set_in_block
[BLOCK_NUM (insn
)], regno
);
2106 /* Record memory first/last/block set information for INSN. */
2109 record_last_mem_set_info (insn
)
2112 if (mem_first_set
== NEVER_SET
)
2113 mem_first_set
= INSN_CUID (insn
);
2115 mem_last_set
= INSN_CUID (insn
);
2116 mem_set_in_block
[BLOCK_NUM (insn
)] = 1;
2119 /* Called from compute_hash_table via note_stores to handle one
2120 SET or CLOBBER in an insn. DATA is really the instruction in which
2121 the SET is taking place. */
2124 record_last_set_info (dest
, setter
, data
)
2125 rtx dest
, setter ATTRIBUTE_UNUSED
;
2128 rtx last_set_insn
= (rtx
) data
;
2130 if (GET_CODE (dest
) == SUBREG
)
2131 dest
= SUBREG_REG (dest
);
2133 if (GET_CODE (dest
) == REG
)
2134 record_last_reg_set_info (last_set_insn
, REGNO (dest
));
2135 else if (GET_CODE (dest
) == MEM
2136 /* Ignore pushes, they clobber nothing. */
2137 && ! push_operand (dest
, GET_MODE (dest
)))
2138 record_last_mem_set_info (last_set_insn
);
2141 /* Top level function to create an expression or assignment hash table.
2143 Expression entries are placed in the hash table if
2144 - they are of the form (set (pseudo-reg) src),
2145 - src is something we want to perform GCSE on,
2146 - none of the operands are subsequently modified in the block
2148 Assignment entries are placed in the hash table if
2149 - they are of the form (set (pseudo-reg) src),
2150 - src is something we want to perform const/copy propagation on,
2151 - none of the operands or target are subsequently modified in the block
2153 Currently src must be a pseudo-reg or a const_int.
2155 F is the first insn.
2156 SET_P is non-zero for computing the assignment hash table. */
2159 compute_hash_table (set_p
)
2164 /* While we compute the hash table we also compute a bit array of which
2165 registers are set in which blocks.
2166 We also compute which blocks set memory, in the absence of aliasing
2167 support [which is TODO].
2168 ??? This isn't needed during const/copy propagation, but it's cheap to
2170 sbitmap_vector_zero (reg_set_in_block
, n_basic_blocks
);
2171 memset ((char *) mem_set_in_block
, 0, n_basic_blocks
);
2173 /* Some working arrays used to track first and last set in each block. */
2174 /* ??? One could use alloca here, but at some size a threshold is crossed
2175 beyond which one should use malloc. Are we at that threshold here? */
2176 reg_first_set
= (int *) gmalloc (max_gcse_regno
* sizeof (int));
2177 reg_last_set
= (int *) gmalloc (max_gcse_regno
* sizeof (int));
2179 for (bb
= 0; bb
< n_basic_blocks
; bb
++)
2183 int in_libcall_block
;
2186 /* First pass over the instructions records information used to
2187 determine when registers and memory are first and last set.
2188 ??? The mem_set_in_block and hard-reg reg_set_in_block computation
2189 could be moved to compute_sets since they currently don't change. */
2191 for (i
= 0; i
< max_gcse_regno
; i
++)
2192 reg_first_set
[i
] = reg_last_set
[i
] = NEVER_SET
;
2194 mem_first_set
= NEVER_SET
;
2195 mem_last_set
= NEVER_SET
;
2197 for (insn
= BLOCK_HEAD (bb
);
2198 insn
&& insn
!= NEXT_INSN (BLOCK_END (bb
));
2199 insn
= NEXT_INSN (insn
))
2201 #ifdef NON_SAVING_SETJMP
2202 if (NON_SAVING_SETJMP
&& GET_CODE (insn
) == NOTE
2203 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_SETJMP
)
2205 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
2206 record_last_reg_set_info (insn
, regno
);
2211 if (! INSN_P (insn
))
2214 if (GET_CODE (insn
) == CALL_INSN
)
2216 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
2217 if ((call_used_regs
[regno
]
2218 && regno
!= STACK_POINTER_REGNUM
2219 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2220 && regno
!= HARD_FRAME_POINTER_REGNUM
2222 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2223 && ! (regno
== ARG_POINTER_REGNUM
&& fixed_regs
[regno
])
2225 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2226 && ! (regno
== PIC_OFFSET_TABLE_REGNUM
&& flag_pic
)
2229 && regno
!= FRAME_POINTER_REGNUM
)
2230 || global_regs
[regno
])
2231 record_last_reg_set_info (insn
, regno
);
2233 if (! CONST_CALL_P (insn
))
2234 record_last_mem_set_info (insn
);
2237 note_stores (PATTERN (insn
), record_last_set_info
, insn
);
2240 /* The next pass builds the hash table. */
2242 for (insn
= BLOCK_HEAD (bb
), in_libcall_block
= 0;
2243 insn
&& insn
!= NEXT_INSN (BLOCK_END (bb
));
2244 insn
= NEXT_INSN (insn
))
2247 if (find_reg_note (insn
, REG_LIBCALL
, NULL_RTX
))
2248 in_libcall_block
= 1;
2249 else if (find_reg_note (insn
, REG_RETVAL
, NULL_RTX
))
2250 in_libcall_block
= 0;
2251 hash_scan_insn (insn
, set_p
, in_libcall_block
);
2255 free (reg_first_set
);
2256 free (reg_last_set
);
2258 /* Catch bugs early. */
2259 reg_first_set
= reg_last_set
= 0;
2262 /* Allocate space for the set hash table.
2263 N_INSNS is the number of instructions in the function.
2264 It is used to determine the number of buckets to use. */
2267 alloc_set_hash_table (n_insns
)
2272 set_hash_table_size
= n_insns
/ 4;
2273 if (set_hash_table_size
< 11)
2274 set_hash_table_size
= 11;
2276 /* Attempt to maintain efficient use of hash table.
2277 Making it an odd number is simplest for now.
2278 ??? Later take some measurements. */
2279 set_hash_table_size
|= 1;
2280 n
= set_hash_table_size
* sizeof (struct expr
*);
2281 set_hash_table
= (struct expr
**) gmalloc (n
);
2284 /* Free things allocated by alloc_set_hash_table. */
2287 free_set_hash_table ()
2289 free (set_hash_table
);
2292 /* Compute the hash table for doing copy/const propagation. */
2295 compute_set_hash_table ()
2297 /* Initialize count of number of entries in hash table. */
2299 memset ((char *) set_hash_table
, 0,
2300 set_hash_table_size
* sizeof (struct expr
*));
2302 compute_hash_table (1);
2305 /* Allocate space for the expression hash table.
2306 N_INSNS is the number of instructions in the function.
2307 It is used to determine the number of buckets to use. */
2310 alloc_expr_hash_table (n_insns
)
2311 unsigned int n_insns
;
2315 expr_hash_table_size
= n_insns
/ 2;
2316 /* Make sure the amount is usable. */
2317 if (expr_hash_table_size
< 11)
2318 expr_hash_table_size
= 11;
2320 /* Attempt to maintain efficient use of hash table.
2321 Making it an odd number is simplest for now.
2322 ??? Later take some measurements. */
2323 expr_hash_table_size
|= 1;
2324 n
= expr_hash_table_size
* sizeof (struct expr
*);
2325 expr_hash_table
= (struct expr
**) gmalloc (n
);
2328 /* Free things allocated by alloc_expr_hash_table. */
2331 free_expr_hash_table ()
2333 free (expr_hash_table
);
2336 /* Compute the hash table for doing GCSE. */
2339 compute_expr_hash_table ()
2341 /* Initialize count of number of entries in hash table. */
2343 memset ((char *) expr_hash_table
, 0,
2344 expr_hash_table_size
* sizeof (struct expr
*));
2346 compute_hash_table (0);
2349 /* Expression tracking support. */
2351 /* Lookup pattern PAT in the expression table.
2352 The result is a pointer to the table entry, or NULL if not found. */
2354 static struct expr
*
2358 int do_not_record_p
;
2359 unsigned int hash
= hash_expr (pat
, GET_MODE (pat
), &do_not_record_p
,
2360 expr_hash_table_size
);
2363 if (do_not_record_p
)
2366 expr
= expr_hash_table
[hash
];
2368 while (expr
&& ! expr_equiv_p (expr
->expr
, pat
))
2369 expr
= expr
->next_same_hash
;
2374 /* Lookup REGNO in the set table. If PAT is non-NULL look for the entry that
2375 matches it, otherwise return the first entry for REGNO. The result is a
2376 pointer to the table entry, or NULL if not found. */
2378 static struct expr
*
2379 lookup_set (regno
, pat
)
2383 unsigned int hash
= hash_set (regno
, set_hash_table_size
);
2386 expr
= set_hash_table
[hash
];
2390 while (expr
&& ! expr_equiv_p (expr
->expr
, pat
))
2391 expr
= expr
->next_same_hash
;
2395 while (expr
&& REGNO (SET_DEST (expr
->expr
)) != regno
)
2396 expr
= expr
->next_same_hash
;
2402 /* Return the next entry for REGNO in list EXPR. */
2404 static struct expr
*
2405 next_set (regno
, expr
)
2410 expr
= expr
->next_same_hash
;
2411 while (expr
&& REGNO (SET_DEST (expr
->expr
)) != regno
);
2416 /* Reset tables used to keep track of what's still available [since the
2417 start of the block]. */
2420 reset_opr_set_tables ()
2422 /* Maintain a bitmap of which regs have been set since beginning of
2424 sbitmap_zero (reg_set_bitmap
);
2426 /* Also keep a record of the last instruction to modify memory.
2427 For now this is very trivial, we only record whether any memory
2428 location has been modified. */
2432 /* Return non-zero if the operands of X are not set before INSN in
2433 INSN's basic block. */
2436 oprs_not_set_p (x
, insn
)
2446 code
= GET_CODE (x
);
2461 if (mem_last_set
!= 0)
2464 return oprs_not_set_p (XEXP (x
, 0), insn
);
2467 return ! TEST_BIT (reg_set_bitmap
, REGNO (x
));
2473 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
2477 /* If we are about to do the last recursive call
2478 needed at this level, change it into iteration.
2479 This function is called enough to be worth it. */
2481 return oprs_not_set_p (XEXP (x
, i
), insn
);
2483 if (! oprs_not_set_p (XEXP (x
, i
), insn
))
2486 else if (fmt
[i
] == 'E')
2487 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
2488 if (! oprs_not_set_p (XVECEXP (x
, i
, j
), insn
))
2495 /* Mark things set by a CALL. */
2501 mem_last_set
= INSN_CUID (insn
);
2504 /* Mark things set by a SET. */
2507 mark_set (pat
, insn
)
2510 rtx dest
= SET_DEST (pat
);
2512 while (GET_CODE (dest
) == SUBREG
2513 || GET_CODE (dest
) == ZERO_EXTRACT
2514 || GET_CODE (dest
) == SIGN_EXTRACT
2515 || GET_CODE (dest
) == STRICT_LOW_PART
)
2516 dest
= XEXP (dest
, 0);
2518 if (GET_CODE (dest
) == REG
)
2519 SET_BIT (reg_set_bitmap
, REGNO (dest
));
2520 else if (GET_CODE (dest
) == MEM
)
2521 mem_last_set
= INSN_CUID (insn
);
2523 if (GET_CODE (SET_SRC (pat
)) == CALL
)
2527 /* Record things set by a CLOBBER. */
2530 mark_clobber (pat
, insn
)
2533 rtx clob
= XEXP (pat
, 0);
2535 while (GET_CODE (clob
) == SUBREG
|| GET_CODE (clob
) == STRICT_LOW_PART
)
2536 clob
= XEXP (clob
, 0);
2538 if (GET_CODE (clob
) == REG
)
2539 SET_BIT (reg_set_bitmap
, REGNO (clob
));
2541 mem_last_set
= INSN_CUID (insn
);
2544 /* Record things set by INSN.
2545 This data is used by oprs_not_set_p. */
2548 mark_oprs_set (insn
)
2551 rtx pat
= PATTERN (insn
);
2554 if (GET_CODE (pat
) == SET
)
2555 mark_set (pat
, insn
);
2556 else if (GET_CODE (pat
) == PARALLEL
)
2557 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
2559 rtx x
= XVECEXP (pat
, 0, i
);
2561 if (GET_CODE (x
) == SET
)
2563 else if (GET_CODE (x
) == CLOBBER
)
2564 mark_clobber (x
, insn
);
2565 else if (GET_CODE (x
) == CALL
)
2569 else if (GET_CODE (pat
) == CLOBBER
)
2570 mark_clobber (pat
, insn
);
2571 else if (GET_CODE (pat
) == CALL
)
2576 /* Classic GCSE reaching definition support. */
2578 /* Allocate reaching def variables. */
2581 alloc_rd_mem (n_blocks
, n_insns
)
2582 int n_blocks
, n_insns
;
2584 rd_kill
= (sbitmap
*) sbitmap_vector_alloc (n_blocks
, n_insns
);
2585 sbitmap_vector_zero (rd_kill
, n_basic_blocks
);
2587 rd_gen
= (sbitmap
*) sbitmap_vector_alloc (n_blocks
, n_insns
);
2588 sbitmap_vector_zero (rd_gen
, n_basic_blocks
);
2590 reaching_defs
= (sbitmap
*) sbitmap_vector_alloc (n_blocks
, n_insns
);
2591 sbitmap_vector_zero (reaching_defs
, n_basic_blocks
);
2593 rd_out
= (sbitmap
*) sbitmap_vector_alloc (n_blocks
, n_insns
);
2594 sbitmap_vector_zero (rd_out
, n_basic_blocks
);
2597 /* Free reaching def variables. */
2604 free (reaching_defs
);
2608 /* Add INSN to the kills of BB. REGNO, set in BB, is killed by INSN. */
2611 handle_rd_kill_set (insn
, regno
, bb
)
2615 struct reg_set
*this_reg
;
2617 for (this_reg
= reg_set_table
[regno
]; this_reg
; this_reg
= this_reg
->next
)
2618 if (BLOCK_NUM (this_reg
->insn
) != BLOCK_NUM (insn
))
2619 SET_BIT (rd_kill
[bb
], INSN_CUID (this_reg
->insn
));
2622 /* Compute the set of kill's for reaching definitions. */
2631 For each set bit in `gen' of the block (i.e each insn which
2632 generates a definition in the block)
2633 Call the reg set by the insn corresponding to that bit regx
2634 Look at the linked list starting at reg_set_table[regx]
2635 For each setting of regx in the linked list, which is not in
2637 Set the bit in `kill' corresponding to that insn. */
2638 for (bb
= 0; bb
< n_basic_blocks
; bb
++)
2639 for (cuid
= 0; cuid
< max_cuid
; cuid
++)
2640 if (TEST_BIT (rd_gen
[bb
], cuid
))
2642 rtx insn
= CUID_INSN (cuid
);
2643 rtx pat
= PATTERN (insn
);
2645 if (GET_CODE (insn
) == CALL_INSN
)
2647 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
2649 if ((call_used_regs
[regno
]
2650 && regno
!= STACK_POINTER_REGNUM
2651 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2652 && regno
!= HARD_FRAME_POINTER_REGNUM
2654 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2655 && ! (regno
== ARG_POINTER_REGNUM
2656 && fixed_regs
[regno
])
2658 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2659 && ! (regno
== PIC_OFFSET_TABLE_REGNUM
&& flag_pic
)
2661 && regno
!= FRAME_POINTER_REGNUM
)
2662 || global_regs
[regno
])
2663 handle_rd_kill_set (insn
, regno
, bb
);
2667 if (GET_CODE (pat
) == PARALLEL
)
2669 for (i
= XVECLEN (pat
, 0) - 1; i
>= 0; i
--)
2671 enum rtx_code code
= GET_CODE (XVECEXP (pat
, 0, i
));
2673 if ((code
== SET
|| code
== CLOBBER
)
2674 && GET_CODE (XEXP (XVECEXP (pat
, 0, i
), 0)) == REG
)
2675 handle_rd_kill_set (insn
,
2676 REGNO (XEXP (XVECEXP (pat
, 0, i
), 0)),
2680 else if (GET_CODE (pat
) == SET
&& GET_CODE (SET_DEST (pat
)) == REG
)
2681 /* Each setting of this register outside of this block
2682 must be marked in the set of kills in this block. */
2683 handle_rd_kill_set (insn
, REGNO (SET_DEST (pat
)), bb
);
2687 /* Compute the reaching definitions as in
2688 Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
2689 Chapter 10. It is the same algorithm as used for computing available
2690 expressions but applied to the gens and kills of reaching definitions. */
2695 int bb
, changed
, passes
;
2697 for (bb
= 0; bb
< n_basic_blocks
; bb
++)
2698 sbitmap_copy (rd_out
[bb
] /*dst*/, rd_gen
[bb
] /*src*/);
2705 for (bb
= 0; bb
< n_basic_blocks
; bb
++)
2707 sbitmap_union_of_preds (reaching_defs
[bb
], rd_out
, bb
);
2708 changed
|= sbitmap_union_of_diff (rd_out
[bb
], rd_gen
[bb
],
2709 reaching_defs
[bb
], rd_kill
[bb
]);
2715 fprintf (gcse_file
, "reaching def computation: %d passes\n", passes
);
2718 /* Classic GCSE available expression support. */
2720 /* Allocate memory for available expression computation. */
2723 alloc_avail_expr_mem (n_blocks
, n_exprs
)
2724 int n_blocks
, n_exprs
;
2726 ae_kill
= (sbitmap
*) sbitmap_vector_alloc (n_blocks
, n_exprs
);
2727 sbitmap_vector_zero (ae_kill
, n_basic_blocks
);
2729 ae_gen
= (sbitmap
*) sbitmap_vector_alloc (n_blocks
, n_exprs
);
2730 sbitmap_vector_zero (ae_gen
, n_basic_blocks
);
2732 ae_in
= (sbitmap
*) sbitmap_vector_alloc (n_blocks
, n_exprs
);
2733 sbitmap_vector_zero (ae_in
, n_basic_blocks
);
2735 ae_out
= (sbitmap
*) sbitmap_vector_alloc (n_blocks
, n_exprs
);
2736 sbitmap_vector_zero (ae_out
, n_basic_blocks
);
2740 free_avail_expr_mem ()
2748 /* Compute the set of available expressions generated in each basic block. */
2757 /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
2758 This is all we have to do because an expression is not recorded if it
2759 is not available, and the only expressions we want to work with are the
2760 ones that are recorded. */
2761 for (i
= 0; i
< expr_hash_table_size
; i
++)
2762 for (expr
= expr_hash_table
[i
]; expr
!= 0; expr
= expr
->next_same_hash
)
2763 for (occr
= expr
->avail_occr
; occr
!= 0; occr
= occr
->next
)
2764 SET_BIT (ae_gen
[BLOCK_NUM (occr
->insn
)], expr
->bitmap_index
);
2767 /* Return non-zero if expression X is killed in BB. */
2770 expr_killed_p (x
, bb
)
2781 code
= GET_CODE (x
);
2785 return TEST_BIT (reg_set_in_block
[bb
], REGNO (x
));
2788 if (mem_set_in_block
[bb
])
2791 return expr_killed_p (XEXP (x
, 0), bb
);
2808 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
2812 /* If we are about to do the last recursive call
2813 needed at this level, change it into iteration.
2814 This function is called enough to be worth it. */
2816 return expr_killed_p (XEXP (x
, i
), bb
);
2817 else if (expr_killed_p (XEXP (x
, i
), bb
))
2820 else if (fmt
[i
] == 'E')
2821 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
2822 if (expr_killed_p (XVECEXP (x
, i
, j
), bb
))
2829 /* Compute the set of available expressions killed in each basic block. */
2832 compute_ae_kill (ae_gen
, ae_kill
)
2833 sbitmap
*ae_gen
, *ae_kill
;
2839 for (bb
= 0; bb
< n_basic_blocks
; bb
++)
2840 for (i
= 0; i
< expr_hash_table_size
; i
++)
2841 for (expr
= expr_hash_table
[i
]; expr
; expr
= expr
->next_same_hash
)
2843 /* Skip EXPR if generated in this block. */
2844 if (TEST_BIT (ae_gen
[bb
], expr
->bitmap_index
))
2847 if (expr_killed_p (expr
->expr
, bb
))
2848 SET_BIT (ae_kill
[bb
], expr
->bitmap_index
);
2852 /* Actually perform the Classic GCSE optimizations. */
2854 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
2856 CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
2857 as a positive reach. We want to do this when there are two computations
2858 of the expression in the block.
2860 VISITED is a pointer to a working buffer for tracking which BB's have
2861 been visited. It is NULL for the top-level call.
2863 We treat reaching expressions that go through blocks containing the same
2864 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
2865 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2866 2 as not reaching. The intent is to improve the probability of finding
2867 only one reaching expression and to reduce register lifetimes by picking
2868 the closest such expression. */
2871 expr_reaches_here_p_work (occr
, expr
, bb
, check_self_loop
, visited
)
2875 int check_self_loop
;
2880 for (pred
= BASIC_BLOCK(bb
)->pred
; pred
!= NULL
; pred
= pred
->pred_next
)
2882 int pred_bb
= pred
->src
->index
;
2884 if (visited
[pred_bb
])
2885 /* This predecessor has already been visited. Nothing to do. */
2887 else if (pred_bb
== bb
)
2889 /* BB loops on itself. */
2891 && TEST_BIT (ae_gen
[pred_bb
], expr
->bitmap_index
)
2892 && BLOCK_NUM (occr
->insn
) == pred_bb
)
2895 visited
[pred_bb
] = 1;
2898 /* Ignore this predecessor if it kills the expression. */
2899 else if (TEST_BIT (ae_kill
[pred_bb
], expr
->bitmap_index
))
2900 visited
[pred_bb
] = 1;
2902 /* Does this predecessor generate this expression? */
2903 else if (TEST_BIT (ae_gen
[pred_bb
], expr
->bitmap_index
))
2905 /* Is this the occurrence we're looking for?
2906 Note that there's only one generating occurrence per block
2907 so we just need to check the block number. */
2908 if (BLOCK_NUM (occr
->insn
) == pred_bb
)
2911 visited
[pred_bb
] = 1;
2914 /* Neither gen nor kill. */
2917 visited
[pred_bb
] = 1;
2918 if (expr_reaches_here_p_work (occr
, expr
, pred_bb
, check_self_loop
,
2925 /* All paths have been checked. */
2929 /* This wrapper for expr_reaches_here_p_work() is to ensure that any
2930 memory allocated for that function is returned. */
2933 expr_reaches_here_p (occr
, expr
, bb
, check_self_loop
)
2937 int check_self_loop
;
2940 char *visited
= (char *) xcalloc (n_basic_blocks
, 1);
2942 rval
= expr_reaches_here_p_work (occr
, expr
, bb
, check_self_loop
, visited
);
2948 /* Return the instruction that computes EXPR that reaches INSN's basic block.
2949 If there is more than one such instruction, return NULL.
2951 Called only by handle_avail_expr. */
2954 computing_insn (expr
, insn
)
2958 int bb
= BLOCK_NUM (insn
);
2960 if (expr
->avail_occr
->next
== NULL
)
2962 if (BLOCK_NUM (expr
->avail_occr
->insn
) == bb
)
2963 /* The available expression is actually itself
2964 (i.e. a loop in the flow graph) so do nothing. */
2967 /* (FIXME) Case that we found a pattern that was created by
2968 a substitution that took place. */
2969 return expr
->avail_occr
->insn
;
2973 /* Pattern is computed more than once.
2974 Search backwards from this insn to see how many of these
2975 computations actually reach this insn. */
2977 rtx insn_computes_expr
= NULL
;
2980 for (occr
= expr
->avail_occr
; occr
!= NULL
; occr
= occr
->next
)
2982 if (BLOCK_NUM (occr
->insn
) == bb
)
2984 /* The expression is generated in this block.
2985 The only time we care about this is when the expression
2986 is generated later in the block [and thus there's a loop].
2987 We let the normal cse pass handle the other cases. */
2988 if (INSN_CUID (insn
) < INSN_CUID (occr
->insn
)
2989 && expr_reaches_here_p (occr
, expr
, bb
, 1))
2995 insn_computes_expr
= occr
->insn
;
2998 else if (expr_reaches_here_p (occr
, expr
, bb
, 0))
3004 insn_computes_expr
= occr
->insn
;
3008 if (insn_computes_expr
== NULL
)
3011 return insn_computes_expr
;
3015 /* Return non-zero if the definition in DEF_INSN can reach INSN.
3016 Only called by can_disregard_other_sets. */
3019 def_reaches_here_p (insn
, def_insn
)
3024 if (TEST_BIT (reaching_defs
[BLOCK_NUM (insn
)], INSN_CUID (def_insn
)))
3027 if (BLOCK_NUM (insn
) == BLOCK_NUM (def_insn
))
3029 if (INSN_CUID (def_insn
) < INSN_CUID (insn
))
3031 if (GET_CODE (PATTERN (def_insn
)) == PARALLEL
)
3033 else if (GET_CODE (PATTERN (def_insn
)) == CLOBBER
)
3034 reg
= XEXP (PATTERN (def_insn
), 0);
3035 else if (GET_CODE (PATTERN (def_insn
)) == SET
)
3036 reg
= SET_DEST (PATTERN (def_insn
));
3040 return ! reg_set_between_p (reg
, NEXT_INSN (def_insn
), insn
);
3049 /* Return non-zero if *ADDR_THIS_REG can only have one value at INSN. The
3050 value returned is the number of definitions that reach INSN. Returning a
3051 value of zero means that [maybe] more than one definition reaches INSN and
3052 the caller can't perform whatever optimization it is trying. i.e. it is
3053 always safe to return zero. */
3056 can_disregard_other_sets (addr_this_reg
, insn
, for_combine
)
3057 struct reg_set
**addr_this_reg
;
3061 int number_of_reaching_defs
= 0;
3062 struct reg_set
*this_reg
;
3064 for (this_reg
= *addr_this_reg
; this_reg
!= 0; this_reg
= this_reg
->next
)
3065 if (def_reaches_here_p (insn
, this_reg
->insn
))
3067 number_of_reaching_defs
++;
3068 /* Ignore parallels for now. */
3069 if (GET_CODE (PATTERN (this_reg
->insn
)) == PARALLEL
)
3073 && (GET_CODE (PATTERN (this_reg
->insn
)) == CLOBBER
3074 || ! rtx_equal_p (SET_SRC (PATTERN (this_reg
->insn
)),
3075 SET_SRC (PATTERN (insn
)))))
3076 /* A setting of the reg to a different value reaches INSN. */
3079 if (number_of_reaching_defs
> 1)
3081 /* If in this setting the value the register is being set to is
3082 equal to the previous value the register was set to and this
3083 setting reaches the insn we are trying to do the substitution
3084 on then we are ok. */
3085 if (GET_CODE (PATTERN (this_reg
->insn
)) == CLOBBER
)
3087 else if (! rtx_equal_p (SET_SRC (PATTERN (this_reg
->insn
)),
3088 SET_SRC (PATTERN (insn
))))
3092 *addr_this_reg
= this_reg
;
3095 return number_of_reaching_defs
;
3098 /* Expression computed by insn is available and the substitution is legal,
3099 so try to perform the substitution.
3101 The result is non-zero if any changes were made. */
3104 handle_avail_expr (insn
, expr
)
3108 rtx pat
, insn_computes_expr
;
3110 struct reg_set
*this_reg
;
3111 int found_setting
, use_src
;
3114 /* We only handle the case where one computation of the expression
3115 reaches this instruction. */
3116 insn_computes_expr
= computing_insn (expr
, insn
);
3117 if (insn_computes_expr
== NULL
)
3123 /* At this point we know only one computation of EXPR outside of this
3124 block reaches this insn. Now try to find a register that the
3125 expression is computed into. */
3126 if (GET_CODE (SET_SRC (PATTERN (insn_computes_expr
))) == REG
)
3128 /* This is the case when the available expression that reaches
3129 here has already been handled as an available expression. */
3130 unsigned int regnum_for_replacing
3131 = REGNO (SET_SRC (PATTERN (insn_computes_expr
)));
3133 /* If the register was created by GCSE we can't use `reg_set_table',
3134 however we know it's set only once. */
3135 if (regnum_for_replacing
>= max_gcse_regno
3136 /* If the register the expression is computed into is set only once,
3137 or only one set reaches this insn, we can use it. */
3138 || (((this_reg
= reg_set_table
[regnum_for_replacing
]),
3139 this_reg
->next
== NULL
)
3140 || can_disregard_other_sets (&this_reg
, insn
, 0)))
3149 unsigned int regnum_for_replacing
3150 = REGNO (SET_DEST (PATTERN (insn_computes_expr
)));
3152 /* This shouldn't happen. */
3153 if (regnum_for_replacing
>= max_gcse_regno
)
3156 this_reg
= reg_set_table
[regnum_for_replacing
];
3158 /* If the register the expression is computed into is set only once,
3159 or only one set reaches this insn, use it. */
3160 if (this_reg
->next
== NULL
3161 || can_disregard_other_sets (&this_reg
, insn
, 0))
3167 pat
= PATTERN (insn
);
3169 to
= SET_SRC (PATTERN (insn_computes_expr
));
3171 to
= SET_DEST (PATTERN (insn_computes_expr
));
3172 changed
= validate_change (insn
, &SET_SRC (pat
), to
, 0);
3174 /* We should be able to ignore the return code from validate_change but
3175 to play it safe we check. */
3179 if (gcse_file
!= NULL
)
3181 fprintf (gcse_file
, "GCSE: Replacing the source in insn %d with",
3183 fprintf (gcse_file
, " reg %d %s insn %d\n",
3184 REGNO (to
), use_src
? "from" : "set in",
3185 INSN_UID (insn_computes_expr
));
3190 /* The register that the expr is computed into is set more than once. */
3191 else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3193 /* Insert an insn after insnx that copies the reg set in insnx
3194 into a new pseudo register call this new register REGN.
3195 From insnb until end of basic block or until REGB is set
3196 replace all uses of REGB with REGN. */
3199 to
= gen_reg_rtx (GET_MODE (SET_DEST (PATTERN (insn_computes_expr
))));
3201 /* Generate the new insn. */
3202 /* ??? If the change fails, we return 0, even though we created
3203 an insn. I think this is ok. */
3205 = emit_insn_after (gen_rtx_SET (VOIDmode
, to
,
3207 (insn_computes_expr
))),
3208 insn_computes_expr
);
3210 /* Keep block number table up to date. */
3211 set_block_num (new_insn
, BLOCK_NUM (insn_computes_expr
));
3213 /* Keep register set table up to date. */
3214 record_one_set (REGNO (to
), new_insn
);
3216 gcse_create_count
++;
3217 if (gcse_file
!= NULL
)
3219 fprintf (gcse_file
, "GCSE: Creating insn %d to copy value of reg %d",
3220 INSN_UID (NEXT_INSN (insn_computes_expr
)),
3221 REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr
)))));
3222 fprintf (gcse_file
, ", computed in insn %d,\n",
3223 INSN_UID (insn_computes_expr
));
3224 fprintf (gcse_file
, " into newly allocated reg %d\n",
3228 pat
= PATTERN (insn
);
3230 /* Do register replacement for INSN. */
3231 changed
= validate_change (insn
, &SET_SRC (pat
),
3233 (NEXT_INSN (insn_computes_expr
))),
3236 /* We should be able to ignore the return code from validate_change but
3237 to play it safe we check. */
3241 if (gcse_file
!= NULL
)
3244 "GCSE: Replacing the source in insn %d with reg %d ",
3246 REGNO (SET_DEST (PATTERN (NEXT_INSN
3247 (insn_computes_expr
)))));
3248 fprintf (gcse_file
, "set in insn %d\n",
3249 INSN_UID (insn_computes_expr
));
3257 /* Perform classic GCSE. This is called by one_classic_gcse_pass after all
3258 the dataflow analysis has been done.
3260 The result is non-zero if a change was made. */
3268 /* Note we start at block 1. */
3271 for (bb
= 1; bb
< n_basic_blocks
; bb
++)
3273 /* Reset tables used to keep track of what's still valid [since the
3274 start of the block]. */
3275 reset_opr_set_tables ();
3277 for (insn
= BLOCK_HEAD (bb
);
3278 insn
!= NULL
&& insn
!= NEXT_INSN (BLOCK_END (bb
));
3279 insn
= NEXT_INSN (insn
))
3281 /* Is insn of form (set (pseudo-reg) ...)? */
3282 if (GET_CODE (insn
) == INSN
3283 && GET_CODE (PATTERN (insn
)) == SET
3284 && GET_CODE (SET_DEST (PATTERN (insn
))) == REG
3285 && REGNO (SET_DEST (PATTERN (insn
))) >= FIRST_PSEUDO_REGISTER
)
3287 rtx pat
= PATTERN (insn
);
3288 rtx src
= SET_SRC (pat
);
3291 if (want_to_gcse_p (src
)
3292 /* Is the expression recorded? */
3293 && ((expr
= lookup_expr (src
)) != NULL
)
3294 /* Is the expression available [at the start of the
3296 && TEST_BIT (ae_in
[bb
], expr
->bitmap_index
)
3297 /* Are the operands unchanged since the start of the
3299 && oprs_not_set_p (src
, insn
))
3300 changed
|= handle_avail_expr (insn
, expr
);
3303 /* Keep track of everything modified by this insn. */
3304 /* ??? Need to be careful w.r.t. mods done to INSN. */
3306 mark_oprs_set (insn
);
3313 /* Top level routine to perform one classic GCSE pass.
3315 Return non-zero if a change was made. */
3318 one_classic_gcse_pass (pass
)
3323 gcse_subst_count
= 0;
3324 gcse_create_count
= 0;
3326 alloc_expr_hash_table (max_cuid
);
3327 alloc_rd_mem (n_basic_blocks
, max_cuid
);
3328 compute_expr_hash_table ();
3330 dump_hash_table (gcse_file
, "Expression", expr_hash_table
,
3331 expr_hash_table_size
, n_exprs
);
3337 alloc_avail_expr_mem (n_basic_blocks
, n_exprs
);
3339 compute_ae_kill (ae_gen
, ae_kill
);
3340 compute_available (ae_gen
, ae_kill
, ae_out
, ae_in
);
3341 changed
= classic_gcse ();
3342 free_avail_expr_mem ();
3346 free_expr_hash_table ();
3350 fprintf (gcse_file
, "\n");
3351 fprintf (gcse_file
, "GCSE of %s, pass %d: %d bytes needed, %d substs,",
3352 current_function_name
, pass
, bytes_used
, gcse_subst_count
);
3353 fprintf (gcse_file
, "%d insns created\n", gcse_create_count
);
3359 /* Compute copy/constant propagation working variables. */
3361 /* Local properties of assignments. */
3362 static sbitmap
*cprop_pavloc
;
3363 static sbitmap
*cprop_absaltered
;
3365 /* Global properties of assignments (computed from the local properties). */
3366 static sbitmap
*cprop_avin
;
3367 static sbitmap
*cprop_avout
;
3369 /* Allocate vars used for copy/const propagation. N_BLOCKS is the number of
3370 basic blocks. N_SETS is the number of sets. */
3373 alloc_cprop_mem (n_blocks
, n_sets
)
3374 int n_blocks
, n_sets
;
3376 cprop_pavloc
= sbitmap_vector_alloc (n_blocks
, n_sets
);
3377 cprop_absaltered
= sbitmap_vector_alloc (n_blocks
, n_sets
);
3379 cprop_avin
= sbitmap_vector_alloc (n_blocks
, n_sets
);
3380 cprop_avout
= sbitmap_vector_alloc (n_blocks
, n_sets
);
3383 /* Free vars used by copy/const propagation. */
3388 free (cprop_pavloc
);
3389 free (cprop_absaltered
);
3394 /* For each block, compute whether X is transparent. X is either an
3395 expression or an assignment [though we don't care which, for this context
3396 an assignment is treated as an expression]. For each block where an
3397 element of X is modified, set (SET_P == 1) or reset (SET_P == 0) the INDX
3401 compute_transp (x
, indx
, bmap
, set_p
)
3412 /* repeat is used to turn tail-recursion into iteration since GCC
3413 can't do it when there's no return value. */
3419 code
= GET_CODE (x
);
3425 if (REGNO (x
) < FIRST_PSEUDO_REGISTER
)
3427 for (bb
= 0; bb
< n_basic_blocks
; bb
++)
3428 if (TEST_BIT (reg_set_in_block
[bb
], REGNO (x
)))
3429 SET_BIT (bmap
[bb
], indx
);
3433 for (r
= reg_set_table
[REGNO (x
)]; r
!= NULL
; r
= r
->next
)
3434 SET_BIT (bmap
[BLOCK_NUM (r
->insn
)], indx
);
3439 if (REGNO (x
) < FIRST_PSEUDO_REGISTER
)
3441 for (bb
= 0; bb
< n_basic_blocks
; bb
++)
3442 if (TEST_BIT (reg_set_in_block
[bb
], REGNO (x
)))
3443 RESET_BIT (bmap
[bb
], indx
);
3447 for (r
= reg_set_table
[REGNO (x
)]; r
!= NULL
; r
= r
->next
)
3448 RESET_BIT (bmap
[BLOCK_NUM (r
->insn
)], indx
);
3457 for (bb
= 0; bb
< n_basic_blocks
; bb
++)
3458 if (mem_set_in_block
[bb
])
3459 SET_BIT (bmap
[bb
], indx
);
3463 for (bb
= 0; bb
< n_basic_blocks
; bb
++)
3464 if (mem_set_in_block
[bb
])
3465 RESET_BIT (bmap
[bb
], indx
);
3486 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
3490 /* If we are about to do the last recursive call
3491 needed at this level, change it into iteration.
3492 This function is called enough to be worth it. */
3499 compute_transp (XEXP (x
, i
), indx
, bmap
, set_p
);
3501 else if (fmt
[i
] == 'E')
3502 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
3503 compute_transp (XVECEXP (x
, i
, j
), indx
, bmap
, set_p
);
3507 /* Top level routine to do the dataflow analysis needed by copy/const
3511 compute_cprop_data ()
3513 compute_local_properties (cprop_absaltered
, cprop_pavloc
, NULL
, 1);
3514 compute_available (cprop_pavloc
, cprop_absaltered
,
3515 cprop_avout
, cprop_avin
);
3518 /* Copy/constant propagation. */
3520 /* Maximum number of register uses in an insn that we handle. */
3523 /* Table of uses found in an insn.
3524 Allocated statically to avoid alloc/free complexity and overhead. */
3525 static struct reg_use reg_use_table
[MAX_USES
];
3527 /* Index into `reg_use_table' while building it. */
3528 static int reg_use_count
;
3530 /* Set up a list of register numbers used in INSN. The found uses are stored
3531 in `reg_use_table'. `reg_use_count' is initialized to zero before entry,
3532 and contains the number of uses in the table upon exit.
3534 ??? If a register appears multiple times we will record it multiple times.
3535 This doesn't hurt anything but it will slow things down. */
3545 /* repeat is used to turn tail-recursion into iteration since GCC
3546 can't do it when there's no return value. */
3552 code
= GET_CODE (x
);
3556 if (reg_use_count
== MAX_USES
)
3559 reg_use_table
[reg_use_count
].reg_rtx
= x
;
3577 case ASM_INPUT
: /*FIXME*/
3581 if (GET_CODE (SET_DEST (x
)) == MEM
)
3582 find_used_regs (SET_DEST (x
));
3590 /* Recursively scan the operands of this expression. */
3592 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
3596 /* If we are about to do the last recursive call
3597 needed at this level, change it into iteration.
3598 This function is called enough to be worth it. */
3605 find_used_regs (XEXP (x
, i
));
3607 else if (fmt
[i
] == 'E')
3608 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
3609 find_used_regs (XVECEXP (x
, i
, j
));
3613 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3614 Returns non-zero is successful. */
3617 try_replace_reg (from
, to
, insn
)
3625 note
= find_reg_note (insn
, REG_EQUAL
, NULL_RTX
);
3628 note
= find_reg_note (insn
, REG_EQUIV
, NULL_RTX
);
3630 /* If this fails we could try to simplify the result of the
3631 replacement and attempt to recognize the simplified insn.
3633 But we need a general simplify_rtx that doesn't have pass
3634 specific state variables. I'm not aware of one at the moment. */
3636 success
= validate_replace_src (from
, to
, insn
);
3637 set
= single_set (insn
);
3639 /* We've failed to do replacement. Try to add REG_EQUAL note to not loose
3641 if (!success
&& !note
)
3646 note
= REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_EQUAL
,
3647 copy_rtx (SET_SRC (set
)),
3651 /* Always do the replacement in REQ_EQUAL and REG_EQUIV notes. Also
3652 try to simplify them. */
3657 if (!validate_replace_rtx_subexp (from
, to
, insn
, &XEXP (note
, 0)))
3660 src
= XEXP (note
, 0);
3662 /* Try to simplify resulting note. */
3663 simplified
= simplify_rtx (src
);
3667 XEXP (note
, 0) = src
;
3670 /* REG_EQUAL may get simplified into register.
3671 We don't allow that. Remove that note. This code ought
3672 not to hapen, because previous code ought to syntetize
3673 reg-reg move, but be on the safe side. */
3674 else if (REG_P (src
))
3675 remove_note (insn
, note
);
3680 /* Find a set of REGNOs that are available on entry to INSN's block. Returns
3681 NULL no such set is found. */
3683 static struct expr
*
3684 find_avail_set (regno
, insn
)
3688 /* SET1 contains the last set found that can be returned to the caller for
3689 use in a substitution. */
3690 struct expr
*set1
= 0;
3692 /* Loops are not possible here. To get a loop we would need two sets
3693 available at the start of the block containing INSN. ie we would
3694 need two sets like this available at the start of the block:
3696 (set (reg X) (reg Y))
3697 (set (reg Y) (reg X))
3699 This can not happen since the set of (reg Y) would have killed the
3700 set of (reg X) making it unavailable at the start of this block. */
3704 struct expr
*set
= lookup_set (regno
, NULL_RTX
);
3706 /* Find a set that is available at the start of the block
3707 which contains INSN. */
3710 if (TEST_BIT (cprop_avin
[BLOCK_NUM (insn
)], set
->bitmap_index
))
3712 set
= next_set (regno
, set
);
3715 /* If no available set was found we've reached the end of the
3716 (possibly empty) copy chain. */
3720 if (GET_CODE (set
->expr
) != SET
)
3723 src
= SET_SRC (set
->expr
);
3725 /* We know the set is available.
3726 Now check that SRC is ANTLOC (i.e. none of the source operands
3727 have changed since the start of the block).
3729 If the source operand changed, we may still use it for the next
3730 iteration of this loop, but we may not use it for substitutions. */
3732 if (CONSTANT_P (src
) || oprs_not_set_p (src
, insn
))
3735 /* If the source of the set is anything except a register, then
3736 we have reached the end of the copy chain. */
3737 if (GET_CODE (src
) != REG
)
3740 /* Follow the copy chain, ie start another iteration of the loop
3741 and see if we have an available copy into SRC. */
3742 regno
= REGNO (src
);
3745 /* SET1 holds the last set that was available and anticipatable at
3750 /* Subroutine of cprop_insn that tries to propagate constants into
3751 JUMP_INSNS. INSN must be a conditional jump; COPY is a copy of it
3752 that we can use for substitutions.
3753 REG_USED is the use we will try to replace, SRC is the constant we
3754 will try to substitute for it.
3755 Returns nonzero if a change was made. */
3758 cprop_jump (insn
, copy
, reg_used
, src
)
3760 struct reg_use
*reg_used
;
3763 rtx set
= PATTERN (copy
);
3766 /* Replace the register with the appropriate constant. */
3767 replace_rtx (SET_SRC (set
), reg_used
->reg_rtx
, src
);
3769 temp
= simplify_ternary_operation (GET_CODE (SET_SRC (set
)),
3770 GET_MODE (SET_SRC (set
)),
3771 GET_MODE (XEXP (SET_SRC (set
), 0)),
3772 XEXP (SET_SRC (set
), 0),
3773 XEXP (SET_SRC (set
), 1),
3774 XEXP (SET_SRC (set
), 2));
3776 /* If no simplification can be made, then try the next
3781 SET_SRC (set
) = temp
;
3783 /* That may have changed the structure of TEMP, so
3784 force it to be rerecognized if it has not turned
3785 into a nop or unconditional jump. */
3787 INSN_CODE (copy
) = -1;
3788 if ((SET_DEST (set
) == pc_rtx
3789 && (SET_SRC (set
) == pc_rtx
3790 || GET_CODE (SET_SRC (set
)) == LABEL_REF
))
3791 || recog (PATTERN (copy
), copy
, NULL
) >= 0)
3793 /* This has either become an unconditional jump
3794 or a nop-jump. We'd like to delete nop jumps
3795 here, but doing so confuses gcse. So we just
3796 make the replacement and let later passes
3798 PATTERN (insn
) = set
;
3799 INSN_CODE (insn
) = -1;
3801 /* One less use of the label this insn used to jump to
3802 if we turned this into a NOP jump. */
3803 if (SET_SRC (set
) == pc_rtx
&& JUMP_LABEL (insn
) != 0)
3804 --LABEL_NUSES (JUMP_LABEL (insn
));
3806 /* If this has turned into an unconditional jump,
3807 then put a barrier after it so that the unreachable
3808 code will be deleted. */
3809 if (GET_CODE (SET_SRC (set
)) == LABEL_REF
)
3810 emit_barrier_after (insn
);
3812 run_jump_opt_after_gcse
= 1;
3815 if (gcse_file
!= NULL
)
3818 "CONST-PROP: Replacing reg %d in insn %d with constant ",
3819 REGNO (reg_used
->reg_rtx
), INSN_UID (insn
));
3820 print_rtl (gcse_file
, src
);
3821 fprintf (gcse_file
, "\n");
3831 /* Subroutine of cprop_insn that tries to propagate constants into JUMP_INSNS
3832 for machines that have CC0. INSN is a single set that stores into CC0;
3833 the insn following it is a conditional jump. REG_USED is the use we will
3834 try to replace, SRC is the constant we will try to substitute for it.
3835 Returns nonzero if a change was made. */
3838 cprop_cc0_jump (insn
, reg_used
, src
)
3840 struct reg_use
*reg_used
;
3843 rtx jump
= NEXT_INSN (insn
);
3844 rtx copy
= copy_rtx (jump
);
3845 rtx set
= PATTERN (copy
);
3847 /* We need to copy the source of the cc0 setter, as cprop_jump is going to
3848 substitute into it. */
3849 replace_rtx (SET_SRC (set
), cc0_rtx
, copy_rtx (SET_SRC (PATTERN (insn
))));
3850 if (! cprop_jump (jump
, copy
, reg_used
, src
))
3853 /* If we succeeded, delete the cc0 setter. */
3854 PUT_CODE (insn
, NOTE
);
3855 NOTE_LINE_NUMBER (insn
) = NOTE_INSN_DELETED
;
3856 NOTE_SOURCE_FILE (insn
) = 0;
3861 /* Perform constant and copy propagation on INSN.
3862 The result is non-zero if a change was made. */
3865 cprop_insn (insn
, alter_jumps
)
3869 struct reg_use
*reg_used
;
3873 /* Only propagate into SETs. Note that a conditional jump is a
3874 SET with pc_rtx as the destination. */
3875 if ((GET_CODE (insn
) != INSN
3876 && GET_CODE (insn
) != JUMP_INSN
)
3877 || GET_CODE (PATTERN (insn
)) != SET
)
3881 find_used_regs (PATTERN (insn
));
3883 note
= find_reg_note (insn
, REG_EQUIV
, NULL_RTX
);
3885 note
= find_reg_note (insn
, REG_EQUAL
, NULL_RTX
);
3887 /* We may win even when propagating constants into notes. */
3889 find_used_regs (XEXP (note
, 0));
3891 for (reg_used
= ®_use_table
[0]; reg_use_count
> 0;
3892 reg_used
++, reg_use_count
--)
3894 unsigned int regno
= REGNO (reg_used
->reg_rtx
);
3898 /* Ignore registers created by GCSE.
3899 We do this because ... */
3900 if (regno
>= max_gcse_regno
)
3903 /* If the register has already been set in this block, there's
3904 nothing we can do. */
3905 if (! oprs_not_set_p (reg_used
->reg_rtx
, insn
))
3908 /* Find an assignment that sets reg_used and is available
3909 at the start of the block. */
3910 set
= find_avail_set (regno
, insn
);
3915 /* ??? We might be able to handle PARALLELs. Later. */
3916 if (GET_CODE (pat
) != SET
)
3919 src
= SET_SRC (pat
);
3921 /* Constant propagation. */
3922 if (GET_CODE (src
) == CONST_INT
|| GET_CODE (src
) == CONST_DOUBLE
3923 || GET_CODE (src
) == SYMBOL_REF
)
3925 /* Handle normal insns first. */
3926 if (GET_CODE (insn
) == INSN
3927 && try_replace_reg (reg_used
->reg_rtx
, src
, insn
))
3931 if (gcse_file
!= NULL
)
3933 fprintf (gcse_file
, "CONST-PROP: Replacing reg %d in ",
3935 fprintf (gcse_file
, "insn %d with constant ",
3937 print_rtl (gcse_file
, src
);
3938 fprintf (gcse_file
, "\n");
3941 /* The original insn setting reg_used may or may not now be
3942 deletable. We leave the deletion to flow. */
3945 /* Try to propagate a CONST_INT into a conditional jump.
3946 We're pretty specific about what we will handle in this
3947 code, we can extend this as necessary over time.
3949 Right now the insn in question must look like
3950 (set (pc) (if_then_else ...)) */
3951 else if (alter_jumps
3952 && GET_CODE (insn
) == JUMP_INSN
3953 && condjump_p (insn
)
3954 && ! simplejump_p (insn
))
3955 changed
|= cprop_jump (insn
, copy_rtx (insn
), reg_used
, src
);
3957 /* Similar code for machines that use a pair of CC0 setter and
3958 conditional jump insn. */
3959 else if (alter_jumps
3960 && GET_CODE (PATTERN (insn
)) == SET
3961 && SET_DEST (PATTERN (insn
)) == cc0_rtx
3962 && GET_CODE (NEXT_INSN (insn
)) == JUMP_INSN
3963 && condjump_p (NEXT_INSN (insn
))
3964 && ! simplejump_p (NEXT_INSN (insn
)))
3966 if (cprop_cc0_jump (insn
, reg_used
, src
))
3974 else if (GET_CODE (src
) == REG
3975 && REGNO (src
) >= FIRST_PSEUDO_REGISTER
3976 && REGNO (src
) != regno
)
3978 if (try_replace_reg (reg_used
->reg_rtx
, src
, insn
))
3982 if (gcse_file
!= NULL
)
3984 fprintf (gcse_file
, "COPY-PROP: Replacing reg %d in insn %d",
3985 regno
, INSN_UID (insn
));
3986 fprintf (gcse_file
, " with reg %d\n", REGNO (src
));
3989 /* The original insn setting reg_used may or may not now be
3990 deletable. We leave the deletion to flow. */
3991 /* FIXME: If it turns out that the insn isn't deletable,
3992 then we may have unnecessarily extended register lifetimes
3993 and made things worse. */
4001 /* Forward propagate copies. This includes copies and constants. Return
4002 non-zero if a change was made. */
4011 /* Note we start at block 1. */
4014 for (bb
= 1; bb
< n_basic_blocks
; bb
++)
4016 /* Reset tables used to keep track of what's still valid [since the
4017 start of the block]. */
4018 reset_opr_set_tables ();
4020 for (insn
= BLOCK_HEAD (bb
);
4021 insn
!= NULL
&& insn
!= NEXT_INSN (BLOCK_END (bb
));
4022 insn
= NEXT_INSN (insn
))
4026 changed
|= cprop_insn (insn
, alter_jumps
);
4028 /* Keep track of everything modified by this insn. */
4029 /* ??? Need to be careful w.r.t. mods done to INSN. Don't
4030 call mark_oprs_set if we turned the insn into a NOTE. */
4031 if (GET_CODE (insn
) != NOTE
)
4032 mark_oprs_set (insn
);
4037 if (gcse_file
!= NULL
)
4038 fprintf (gcse_file
, "\n");
4043 /* Perform one copy/constant propagation pass.
4044 F is the first insn in the function.
4045 PASS is the pass count. */
4048 one_cprop_pass (pass
, alter_jumps
)
4054 const_prop_count
= 0;
4055 copy_prop_count
= 0;
4057 alloc_set_hash_table (max_cuid
);
4058 compute_set_hash_table ();
4060 dump_hash_table (gcse_file
, "SET", set_hash_table
, set_hash_table_size
,
4064 alloc_cprop_mem (n_basic_blocks
, n_sets
);
4065 compute_cprop_data ();
4066 changed
= cprop (alter_jumps
);
4070 free_set_hash_table ();
4074 fprintf (gcse_file
, "CPROP of %s, pass %d: %d bytes needed, ",
4075 current_function_name
, pass
, bytes_used
);
4076 fprintf (gcse_file
, "%d const props, %d copy props\n\n",
4077 const_prop_count
, copy_prop_count
);
4083 /* Compute PRE+LCM working variables. */
4085 /* Local properties of expressions. */
4086 /* Nonzero for expressions that are transparent in the block. */
4087 static sbitmap
*transp
;
4089 /* Nonzero for expressions that are transparent at the end of the block.
4090 This is only zero for expressions killed by abnormal critical edge
4091 created by a calls. */
4092 static sbitmap
*transpout
;
4094 /* Nonzero for expressions that are computed (available) in the block. */
4095 static sbitmap
*comp
;
4097 /* Nonzero for expressions that are locally anticipatable in the block. */
4098 static sbitmap
*antloc
;
4100 /* Nonzero for expressions where this block is an optimal computation
4102 static sbitmap
*pre_optimal
;
4104 /* Nonzero for expressions which are redundant in a particular block. */
4105 static sbitmap
*pre_redundant
;
4107 /* Nonzero for expressions which should be inserted on a specific edge. */
4108 static sbitmap
*pre_insert_map
;
4110 /* Nonzero for expressions which should be deleted in a specific block. */
4111 static sbitmap
*pre_delete_map
;
4113 /* Contains the edge_list returned by pre_edge_lcm. */
4114 static struct edge_list
*edge_list
;
4116 /* Redundant insns. */
4117 static sbitmap pre_redundant_insns
;
4119 /* Allocate vars used for PRE analysis. */
4122 alloc_pre_mem (n_blocks
, n_exprs
)
4123 int n_blocks
, n_exprs
;
4125 transp
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
4126 comp
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
4127 antloc
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
4130 pre_redundant
= NULL
;
4131 pre_insert_map
= NULL
;
4132 pre_delete_map
= NULL
;
4135 ae_kill
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
4137 /* pre_insert and pre_delete are allocated later. */
4140 /* Free vars used for PRE analysis. */
4148 /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */
4153 free (pre_redundant
);
4155 free (pre_insert_map
);
4157 free (pre_delete_map
);
4164 transp
= comp
= NULL
;
4165 pre_optimal
= pre_redundant
= pre_insert_map
= pre_delete_map
= NULL
;
4166 ae_in
= ae_out
= NULL
;
4169 /* Top level routine to do the dataflow analysis needed by PRE. */
4174 sbitmap trapping_expr
;
4178 compute_local_properties (transp
, comp
, antloc
, 0);
4179 sbitmap_vector_zero (ae_kill
, n_basic_blocks
);
4181 /* Collect expressions which might trap. */
4182 trapping_expr
= sbitmap_alloc (n_exprs
);
4183 sbitmap_zero (trapping_expr
);
4184 for (ui
= 0; ui
< expr_hash_table_size
; ui
++)
4187 for (e
= expr_hash_table
[ui
]; e
!= NULL
; e
= e
->next_same_hash
)
4188 if (may_trap_p (e
->expr
))
4189 SET_BIT (trapping_expr
, e
->bitmap_index
);
4192 /* Compute ae_kill for each basic block using:
4196 This is significantly faster than compute_ae_kill. */
4198 for (i
= 0; i
< n_basic_blocks
; i
++)
4202 /* If the current block is the destination of an abnormal edge, we
4203 kill all trapping expressions because we won't be able to properly
4204 place the instruction on the edge. So make them neither
4205 anticipatable nor transparent. This is fairly conservative. */
4206 for (e
= BASIC_BLOCK (i
)->pred
; e
; e
= e
->pred_next
)
4207 if (e
->flags
& EDGE_ABNORMAL
)
4209 sbitmap_difference (antloc
[i
], antloc
[i
], trapping_expr
);
4210 sbitmap_difference (transp
[i
], transp
[i
], trapping_expr
);
4214 sbitmap_a_or_b (ae_kill
[i
], transp
[i
], comp
[i
]);
4215 sbitmap_not (ae_kill
[i
], ae_kill
[i
]);
4218 edge_list
= pre_edge_lcm (gcse_file
, n_exprs
, transp
, comp
, antloc
,
4219 ae_kill
, &pre_insert_map
, &pre_delete_map
);
4224 free (trapping_expr
);
4229 /* Return non-zero if an occurrence of expression EXPR in OCCR_BB would reach
4232 VISITED is a pointer to a working buffer for tracking which BB's have
4233 been visited. It is NULL for the top-level call.
4235 We treat reaching expressions that go through blocks containing the same
4236 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
4237 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4238 2 as not reaching. The intent is to improve the probability of finding
4239 only one reaching expression and to reduce register lifetimes by picking
4240 the closest such expression. */
4243 pre_expr_reaches_here_p_work (occr_bb
, expr
, bb
, visited
)
4251 for (pred
= BASIC_BLOCK (bb
)->pred
; pred
!= NULL
; pred
= pred
->pred_next
)
4253 int pred_bb
= pred
->src
->index
;
4255 if (pred
->src
== ENTRY_BLOCK_PTR
4256 /* Has predecessor has already been visited? */
4257 || visited
[pred_bb
])
4258 ;/* Nothing to do. */
4260 /* Does this predecessor generate this expression? */
4261 else if (TEST_BIT (comp
[pred_bb
], expr
->bitmap_index
))
4263 /* Is this the occurrence we're looking for?
4264 Note that there's only one generating occurrence per block
4265 so we just need to check the block number. */
4266 if (occr_bb
== pred_bb
)
4269 visited
[pred_bb
] = 1;
4271 /* Ignore this predecessor if it kills the expression. */
4272 else if (! TEST_BIT (transp
[pred_bb
], expr
->bitmap_index
))
4273 visited
[pred_bb
] = 1;
4275 /* Neither gen nor kill. */
4278 visited
[pred_bb
] = 1;
4279 if (pre_expr_reaches_here_p_work (occr_bb
, expr
, pred_bb
, visited
))
4284 /* All paths have been checked. */
4288 /* The wrapper for pre_expr_reaches_here_work that ensures that any
4289 memory allocated for that function is returned. */
4292 pre_expr_reaches_here_p (occr_bb
, expr
, bb
)
4298 char *visited
= (char *) xcalloc (n_basic_blocks
, 1);
4300 rval
= pre_expr_reaches_here_p_work(occr_bb
, expr
, bb
, visited
);
4307 /* Given an expr, generate RTL which we can insert at the end of a BB,
4308 or on an edge. Set the block number of any insns generated to
4312 process_insert_insn (expr
)
4315 rtx reg
= expr
->reaching_reg
;
4316 rtx pat
, copied_expr
;
4320 copied_expr
= copy_rtx (expr
->expr
);
4321 emit_move_insn (reg
, copied_expr
);
4322 first_new_insn
= get_insns ();
4323 pat
= gen_sequence ();
4329 /* Add EXPR to the end of basic block BB.
4331 This is used by both the PRE and code hoisting.
4333 For PRE, we want to verify that the expr is either transparent
4334 or locally anticipatable in the target block. This check makes
4335 no sense for code hoisting. */
4338 insert_insn_end_bb (expr
, bb
, pre
)
4343 rtx insn
= BLOCK_END (bb
);
4345 rtx reg
= expr
->reaching_reg
;
4346 int regno
= REGNO (reg
);
4350 pat
= process_insert_insn (expr
);
4352 /* If the last insn is a jump, insert EXPR in front [taking care to
4353 handle cc0, etc. properly]. */
4355 if (GET_CODE (insn
) == JUMP_INSN
)
4361 /* If this is a jump table, then we can't insert stuff here. Since
4362 we know the previous real insn must be the tablejump, we insert
4363 the new instruction just before the tablejump. */
4364 if (GET_CODE (PATTERN (insn
)) == ADDR_VEC
4365 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
)
4366 insn
= prev_real_insn (insn
);
4369 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4370 if cc0 isn't set. */
4371 note
= find_reg_note (insn
, REG_CC_SETTER
, NULL_RTX
);
4373 insn
= XEXP (note
, 0);
4376 rtx maybe_cc0_setter
= prev_nonnote_insn (insn
);
4377 if (maybe_cc0_setter
4378 && INSN_P (maybe_cc0_setter
)
4379 && sets_cc0_p (PATTERN (maybe_cc0_setter
)))
4380 insn
= maybe_cc0_setter
;
4383 /* FIXME: What if something in cc0/jump uses value set in new insn? */
4384 new_insn
= emit_block_insn_before (pat
, insn
, BASIC_BLOCK (bb
));
4387 /* Likewise if the last insn is a call, as will happen in the presence
4388 of exception handling. */
4389 else if (GET_CODE (insn
) == CALL_INSN
)
4391 HARD_REG_SET parm_regs
;
4395 /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4396 we search backward and place the instructions before the first
4397 parameter is loaded. Do this for everyone for consistency and a
4398 presumtion that we'll get better code elsewhere as well.
4400 It should always be the case that we can put these instructions
4401 anywhere in the basic block with performing PRE optimizations.
4405 && !TEST_BIT (antloc
[bb
], expr
->bitmap_index
)
4406 && !TEST_BIT (transp
[bb
], expr
->bitmap_index
))
4409 /* Since different machines initialize their parameter registers
4410 in different orders, assume nothing. Collect the set of all
4411 parameter registers. */
4412 CLEAR_HARD_REG_SET (parm_regs
);
4414 for (p
= CALL_INSN_FUNCTION_USAGE (insn
); p
; p
= XEXP (p
, 1))
4415 if (GET_CODE (XEXP (p
, 0)) == USE
4416 && GET_CODE (XEXP (XEXP (p
, 0), 0)) == REG
)
4418 if (REGNO (XEXP (XEXP (p
, 0), 0)) >= FIRST_PSEUDO_REGISTER
)
4421 SET_HARD_REG_BIT (parm_regs
, REGNO (XEXP (XEXP (p
, 0), 0)));
4425 /* Search backward for the first set of a register in this set. */
4426 while (nparm_regs
&& BLOCK_HEAD (bb
) != insn
)
4428 insn
= PREV_INSN (insn
);
4429 p
= single_set (insn
);
4430 if (p
&& GET_CODE (SET_DEST (p
)) == REG
4431 && REGNO (SET_DEST (p
)) < FIRST_PSEUDO_REGISTER
4432 && TEST_HARD_REG_BIT (parm_regs
, REGNO (SET_DEST (p
))))
4434 CLEAR_HARD_REG_BIT (parm_regs
, REGNO (SET_DEST (p
)));
4439 /* If we found all the parameter loads, then we want to insert
4440 before the first parameter load.
4442 If we did not find all the parameter loads, then we might have
4443 stopped on the head of the block, which could be a CODE_LABEL.
4444 If we inserted before the CODE_LABEL, then we would be putting
4445 the insn in the wrong basic block. In that case, put the insn
4446 after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */
4447 while (GET_CODE (insn
) == CODE_LABEL
4448 || NOTE_INSN_BASIC_BLOCK_P (insn
))
4449 insn
= NEXT_INSN (insn
);
4451 new_insn
= emit_block_insn_before (pat
, insn
, BASIC_BLOCK (bb
));
4455 new_insn
= emit_insn_after (pat
, insn
);
4456 BLOCK_END (bb
) = new_insn
;
4459 /* Keep block number table up to date.
4460 Note, PAT could be a multiple insn sequence, we have to make
4461 sure that each insn in the sequence is handled. */
4462 if (GET_CODE (pat
) == SEQUENCE
)
4464 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
4466 rtx insn
= XVECEXP (pat
, 0, i
);
4468 set_block_num (insn
, bb
);
4470 add_label_notes (PATTERN (insn
), new_insn
);
4472 note_stores (PATTERN (insn
), record_set_info
, insn
);
4477 add_label_notes (SET_SRC (pat
), new_insn
);
4478 set_block_num (new_insn
, bb
);
4480 /* Keep register set table up to date. */
4481 record_one_set (regno
, new_insn
);
4484 gcse_create_count
++;
4488 fprintf (gcse_file
, "PRE/HOIST: end of bb %d, insn %d, ",
4489 bb
, INSN_UID (new_insn
));
4490 fprintf (gcse_file
, "copying expression %d to reg %d\n",
4491 expr
->bitmap_index
, regno
);
4495 /* Insert partially redundant expressions on edges in the CFG to make
4496 the expressions fully redundant. */
4499 pre_edge_insert (edge_list
, index_map
)
4500 struct edge_list
*edge_list
;
4501 struct expr
**index_map
;
4503 int e
, i
, j
, num_edges
, set_size
, did_insert
= 0;
4506 /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
4507 if it reaches any of the deleted expressions. */
4509 set_size
= pre_insert_map
[0]->size
;
4510 num_edges
= NUM_EDGES (edge_list
);
4511 inserted
= sbitmap_vector_alloc (num_edges
, n_exprs
);
4512 sbitmap_vector_zero (inserted
, num_edges
);
4514 for (e
= 0; e
< num_edges
; e
++)
4517 basic_block pred
= INDEX_EDGE_PRED_BB (edge_list
, e
);
4518 int bb
= pred
->index
;
4520 for (i
= indx
= 0; i
< set_size
; i
++, indx
+= SBITMAP_ELT_BITS
)
4522 SBITMAP_ELT_TYPE insert
= pre_insert_map
[e
]->elms
[i
];
4524 for (j
= indx
; insert
&& j
< n_exprs
; j
++, insert
>>= 1)
4525 if ((insert
& 1) != 0 && index_map
[j
]->reaching_reg
!= NULL_RTX
)
4527 struct expr
*expr
= index_map
[j
];
4530 /* Now look at each deleted occurence of this expression. */
4531 for (occr
= expr
->antic_occr
; occr
!= NULL
; occr
= occr
->next
)
4533 if (! occr
->deleted_p
)
4536 /* Insert this expression on this edge if if it would
4537 reach the deleted occurence in BB. */
4538 if (!TEST_BIT (inserted
[e
], j
))
4541 edge eg
= INDEX_EDGE (edge_list
, e
);
4543 /* We can't insert anything on an abnormal and
4544 critical edge, so we insert the insn at the end of
4545 the previous block. There are several alternatives
4546 detailed in Morgans book P277 (sec 10.5) for
4547 handling this situation. This one is easiest for
4550 if ((eg
->flags
& EDGE_ABNORMAL
) == EDGE_ABNORMAL
)
4551 insert_insn_end_bb (index_map
[j
], bb
, 0);
4554 insn
= process_insert_insn (index_map
[j
]);
4555 insert_insn_on_edge (insn
, eg
);
4560 fprintf (gcse_file
, "PRE/HOIST: edge (%d,%d), ",
4562 INDEX_EDGE_SUCC_BB (edge_list
, e
)->index
);
4563 fprintf (gcse_file
, "copy expression %d\n",
4564 expr
->bitmap_index
);
4567 SET_BIT (inserted
[e
], j
);
4569 gcse_create_count
++;
4580 /* Copy the result of INSN to REG. INDX is the expression number. */
4583 pre_insert_copy_insn (expr
, insn
)
4587 rtx reg
= expr
->reaching_reg
;
4588 int regno
= REGNO (reg
);
4589 int indx
= expr
->bitmap_index
;
4590 rtx set
= single_set (insn
);
4592 int bb
= BLOCK_NUM (insn
);
4597 new_insn
= emit_insn_after (gen_rtx_SET (VOIDmode
, reg
, SET_DEST (set
)),
4600 /* Keep block number table up to date. */
4601 set_block_num (new_insn
, bb
);
4603 /* Keep register set table up to date. */
4604 record_one_set (regno
, new_insn
);
4605 if (insn
== BLOCK_END (bb
))
4606 BLOCK_END (bb
) = new_insn
;
4608 gcse_create_count
++;
4612 "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
4613 BLOCK_NUM (insn
), INSN_UID (new_insn
), indx
,
4614 INSN_UID (insn
), regno
);
4617 /* Copy available expressions that reach the redundant expression
4618 to `reaching_reg'. */
4621 pre_insert_copies ()
4628 /* For each available expression in the table, copy the result to
4629 `reaching_reg' if the expression reaches a deleted one.
4631 ??? The current algorithm is rather brute force.
4632 Need to do some profiling. */
4634 for (i
= 0; i
< expr_hash_table_size
; i
++)
4635 for (expr
= expr_hash_table
[i
]; expr
!= NULL
; expr
= expr
->next_same_hash
)
4637 /* If the basic block isn't reachable, PPOUT will be TRUE. However,
4638 we don't want to insert a copy here because the expression may not
4639 really be redundant. So only insert an insn if the expression was
4640 deleted. This test also avoids further processing if the
4641 expression wasn't deleted anywhere. */
4642 if (expr
->reaching_reg
== NULL
)
4645 for (occr
= expr
->antic_occr
; occr
!= NULL
; occr
= occr
->next
)
4647 if (! occr
->deleted_p
)
4650 for (avail
= expr
->avail_occr
; avail
!= NULL
; avail
= avail
->next
)
4652 rtx insn
= avail
->insn
;
4654 /* No need to handle this one if handled already. */
4655 if (avail
->copied_p
)
4658 /* Don't handle this one if it's a redundant one. */
4659 if (TEST_BIT (pre_redundant_insns
, INSN_CUID (insn
)))
4662 /* Or if the expression doesn't reach the deleted one. */
4663 if (! pre_expr_reaches_here_p (BLOCK_NUM (avail
->insn
), expr
,
4664 BLOCK_NUM (occr
->insn
)))
4667 /* Copy the result of avail to reaching_reg. */
4668 pre_insert_copy_insn (expr
, insn
);
4669 avail
->copied_p
= 1;
4675 /* Delete redundant computations.
4676 Deletion is done by changing the insn to copy the `reaching_reg' of
4677 the expression into the result of the SET. It is left to later passes
4678 (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4680 Returns non-zero if a change is made. */
4691 for (i
= 0; i
< expr_hash_table_size
; i
++)
4692 for (expr
= expr_hash_table
[i
]; expr
!= NULL
; expr
= expr
->next_same_hash
)
4694 int indx
= expr
->bitmap_index
;
4696 /* We only need to search antic_occr since we require
4699 for (occr
= expr
->antic_occr
; occr
!= NULL
; occr
= occr
->next
)
4701 rtx insn
= occr
->insn
;
4703 int bb
= BLOCK_NUM (insn
);
4705 if (TEST_BIT (pre_delete_map
[bb
], indx
))
4707 set
= single_set (insn
);
4711 /* Create a pseudo-reg to store the result of reaching
4712 expressions into. Get the mode for the new pseudo from
4713 the mode of the original destination pseudo. */
4714 if (expr
->reaching_reg
== NULL
)
4716 = gen_reg_rtx (GET_MODE (SET_DEST (set
)));
4718 /* In theory this should never fail since we're creating
4721 However, on the x86 some of the movXX patterns actually
4722 contain clobbers of scratch regs. This may cause the
4723 insn created by validate_change to not match any pattern
4724 and thus cause validate_change to fail. */
4725 if (validate_change (insn
, &SET_SRC (set
),
4726 expr
->reaching_reg
, 0))
4728 occr
->deleted_p
= 1;
4729 SET_BIT (pre_redundant_insns
, INSN_CUID (insn
));
4737 "PRE: redundant insn %d (expression %d) in ",
4738 INSN_UID (insn
), indx
);
4739 fprintf (gcse_file
, "bb %d, reaching reg is %d\n",
4740 bb
, REGNO (expr
->reaching_reg
));
4749 /* Perform GCSE optimizations using PRE.
4750 This is called by one_pre_gcse_pass after all the dataflow analysis
4753 This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
4754 lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
4755 Compiler Design and Implementation.
4757 ??? A new pseudo reg is created to hold the reaching expression. The nice
4758 thing about the classical approach is that it would try to use an existing
4759 reg. If the register can't be adequately optimized [i.e. we introduce
4760 reload problems], one could add a pass here to propagate the new register
4763 ??? We don't handle single sets in PARALLELs because we're [currently] not
4764 able to copy the rest of the parallel when we insert copies to create full
4765 redundancies from partial redundancies. However, there's no reason why we
4766 can't handle PARALLELs in the cases where there are no partial
4773 int did_insert
, changed
;
4774 struct expr
**index_map
;
4777 /* Compute a mapping from expression number (`bitmap_index') to
4778 hash table entry. */
4780 index_map
= (struct expr
**) xcalloc (n_exprs
, sizeof (struct expr
*));
4781 for (i
= 0; i
< expr_hash_table_size
; i
++)
4782 for (expr
= expr_hash_table
[i
]; expr
!= NULL
; expr
= expr
->next_same_hash
)
4783 index_map
[expr
->bitmap_index
] = expr
;
4785 /* Reset bitmap used to track which insns are redundant. */
4786 pre_redundant_insns
= sbitmap_alloc (max_cuid
);
4787 sbitmap_zero (pre_redundant_insns
);
4789 /* Delete the redundant insns first so that
4790 - we know what register to use for the new insns and for the other
4791 ones with reaching expressions
4792 - we know which insns are redundant when we go to create copies */
4794 changed
= pre_delete ();
4796 did_insert
= pre_edge_insert (edge_list
, index_map
);
4798 /* In other places with reaching expressions, copy the expression to the
4799 specially allocated pseudo-reg that reaches the redundant expr. */
4800 pre_insert_copies ();
4803 commit_edge_insertions ();
4808 free (pre_redundant_insns
);
4812 /* Top level routine to perform one PRE GCSE pass.
4814 Return non-zero if a change was made. */
4817 one_pre_gcse_pass (pass
)
4822 gcse_subst_count
= 0;
4823 gcse_create_count
= 0;
4825 alloc_expr_hash_table (max_cuid
);
4826 add_noreturn_fake_exit_edges ();
4827 compute_expr_hash_table ();
4829 dump_hash_table (gcse_file
, "Expression", expr_hash_table
,
4830 expr_hash_table_size
, n_exprs
);
4834 alloc_pre_mem (n_basic_blocks
, n_exprs
);
4835 compute_pre_data ();
4836 changed
|= pre_gcse ();
4837 free_edge_list (edge_list
);
4841 remove_fake_edges ();
4842 free_expr_hash_table ();
4846 fprintf (gcse_file
, "\nPRE GCSE of %s, pass %d: %d bytes needed, ",
4847 current_function_name
, pass
, bytes_used
);
4848 fprintf (gcse_file
, "%d substs, %d insns created\n",
4849 gcse_subst_count
, gcse_create_count
);
4855 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
4856 If notes are added to an insn which references a CODE_LABEL, the
4857 LABEL_NUSES count is incremented. We have to add REG_LABEL notes,
4858 because the following loop optimization pass requires them. */
4860 /* ??? This is very similar to the loop.c add_label_notes function. We
4861 could probably share code here. */
4863 /* ??? If there was a jump optimization pass after gcse and before loop,
4864 then we would not need to do this here, because jump would add the
4865 necessary REG_LABEL notes. */
4868 add_label_notes (x
, insn
)
4872 enum rtx_code code
= GET_CODE (x
);
4876 if (code
== LABEL_REF
&& !LABEL_REF_NONLOCAL_P (x
))
4878 /* This code used to ignore labels that referred to dispatch tables to
4879 avoid flow generating (slighly) worse code.
4881 We no longer ignore such label references (see LABEL_REF handling in
4882 mark_jump_label for additional information). */
4884 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_LABEL
, XEXP (x
, 0),
4886 if (LABEL_P (XEXP (x
, 0)))
4887 LABEL_NUSES (XEXP (x
, 0))++;
4891 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
4894 add_label_notes (XEXP (x
, i
), insn
);
4895 else if (fmt
[i
] == 'E')
4896 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
4897 add_label_notes (XVECEXP (x
, i
, j
), insn
);
4901 /* Compute transparent outgoing information for each block.
4903 An expression is transparent to an edge unless it is killed by
4904 the edge itself. This can only happen with abnormal control flow,
4905 when the edge is traversed through a call. This happens with
4906 non-local labels and exceptions.
4908 This would not be necessary if we split the edge. While this is
4909 normally impossible for abnormal critical edges, with some effort
4910 it should be possible with exception handling, since we still have
4911 control over which handler should be invoked. But due to increased
4912 EH table sizes, this may not be worthwhile. */
4915 compute_transpout ()
4921 sbitmap_vector_ones (transpout
, n_basic_blocks
);
4923 for (bb
= 0; bb
< n_basic_blocks
; ++bb
)
4925 /* Note that flow inserted a nop a the end of basic blocks that
4926 end in call instructions for reasons other than abnormal
4928 if (GET_CODE (BLOCK_END (bb
)) != CALL_INSN
)
4931 for (i
= 0; i
< expr_hash_table_size
; i
++)
4932 for (expr
= expr_hash_table
[i
]; expr
; expr
= expr
->next_same_hash
)
4933 if (GET_CODE (expr
->expr
) == MEM
)
4935 if (GET_CODE (XEXP (expr
->expr
, 0)) == SYMBOL_REF
4936 && CONSTANT_POOL_ADDRESS_P (XEXP (expr
->expr
, 0)))
4939 /* ??? Optimally, we would use interprocedural alias
4940 analysis to determine if this mem is actually killed
4942 RESET_BIT (transpout
[bb
], expr
->bitmap_index
);
4947 /* Removal of useless null pointer checks */
4949 /* Called via note_stores. X is set by SETTER. If X is a register we must
4950 invalidate nonnull_local and set nonnull_killed. DATA is really a
4951 `null_pointer_info *'.
4953 We ignore hard registers. */
4956 invalidate_nonnull_info (x
, setter
, data
)
4958 rtx setter ATTRIBUTE_UNUSED
;
4962 struct null_pointer_info
*npi
= (struct null_pointer_info
*) data
;
4964 while (GET_CODE (x
) == SUBREG
)
4967 /* Ignore anything that is not a register or is a hard register. */
4968 if (GET_CODE (x
) != REG
4969 || REGNO (x
) < npi
->min_reg
4970 || REGNO (x
) >= npi
->max_reg
)
4973 regno
= REGNO (x
) - npi
->min_reg
;
4975 RESET_BIT (npi
->nonnull_local
[npi
->current_block
], regno
);
4976 SET_BIT (npi
->nonnull_killed
[npi
->current_block
], regno
);
4979 /* Do null-pointer check elimination for the registers indicated in
4980 NPI. NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps;
4981 they are not our responsibility to free. */
4984 delete_null_pointer_checks_1 (delete_list
, block_reg
, nonnull_avin
,
4986 varray_type
*delete_list
;
4987 unsigned int *block_reg
;
4988 sbitmap
*nonnull_avin
;
4989 sbitmap
*nonnull_avout
;
4990 struct null_pointer_info
*npi
;
4994 sbitmap
*nonnull_local
= npi
->nonnull_local
;
4995 sbitmap
*nonnull_killed
= npi
->nonnull_killed
;
4997 /* Compute local properties, nonnull and killed. A register will have
4998 the nonnull property if at the end of the current block its value is
4999 known to be nonnull. The killed property indicates that somewhere in
5000 the block any information we had about the register is killed.
5002 Note that a register can have both properties in a single block. That
5003 indicates that it's killed, then later in the block a new value is
5005 sbitmap_vector_zero (nonnull_local
, n_basic_blocks
);
5006 sbitmap_vector_zero (nonnull_killed
, n_basic_blocks
);
5008 for (current_block
= 0; current_block
< n_basic_blocks
; current_block
++)
5010 rtx insn
, stop_insn
;
5012 /* Set the current block for invalidate_nonnull_info. */
5013 npi
->current_block
= current_block
;
5015 /* Scan each insn in the basic block looking for memory references and
5017 stop_insn
= NEXT_INSN (BLOCK_END (current_block
));
5018 for (insn
= BLOCK_HEAD (current_block
);
5020 insn
= NEXT_INSN (insn
))
5025 /* Ignore anything that is not a normal insn. */
5026 if (! INSN_P (insn
))
5029 /* Basically ignore anything that is not a simple SET. We do have
5030 to make sure to invalidate nonnull_local and set nonnull_killed
5031 for such insns though. */
5032 set
= single_set (insn
);
5035 note_stores (PATTERN (insn
), invalidate_nonnull_info
, npi
);
5039 /* See if we've got a useable memory load. We handle it first
5040 in case it uses its address register as a dest (which kills
5041 the nonnull property). */
5042 if (GET_CODE (SET_SRC (set
)) == MEM
5043 && GET_CODE ((reg
= XEXP (SET_SRC (set
), 0))) == REG
5044 && REGNO (reg
) >= npi
->min_reg
5045 && REGNO (reg
) < npi
->max_reg
)
5046 SET_BIT (nonnull_local
[current_block
],
5047 REGNO (reg
) - npi
->min_reg
);
5049 /* Now invalidate stuff clobbered by this insn. */
5050 note_stores (PATTERN (insn
), invalidate_nonnull_info
, npi
);
5052 /* And handle stores, we do these last since any sets in INSN can
5053 not kill the nonnull property if it is derived from a MEM
5054 appearing in a SET_DEST. */
5055 if (GET_CODE (SET_DEST (set
)) == MEM
5056 && GET_CODE ((reg
= XEXP (SET_DEST (set
), 0))) == REG
5057 && REGNO (reg
) >= npi
->min_reg
5058 && REGNO (reg
) < npi
->max_reg
)
5059 SET_BIT (nonnull_local
[current_block
],
5060 REGNO (reg
) - npi
->min_reg
);
5064 /* Now compute global properties based on the local properties. This
5065 is a classic global availablity algorithm. */
5066 compute_available (nonnull_local
, nonnull_killed
,
5067 nonnull_avout
, nonnull_avin
);
5069 /* Now look at each bb and see if it ends with a compare of a value
5071 for (bb
= 0; bb
< n_basic_blocks
; bb
++)
5073 rtx last_insn
= BLOCK_END (bb
);
5074 rtx condition
, earliest
;
5075 int compare_and_branch
;
5077 /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and
5078 since BLOCK_REG[BB] is zero if this block did not end with a
5079 comparison against zero, this condition works. */
5080 if (block_reg
[bb
] < npi
->min_reg
5081 || block_reg
[bb
] >= npi
->max_reg
)
5084 /* LAST_INSN is a conditional jump. Get its condition. */
5085 condition
= get_condition (last_insn
, &earliest
);
5087 /* If we can't determine the condition then skip. */
5091 /* Is the register known to have a nonzero value? */
5092 if (!TEST_BIT (nonnull_avout
[bb
], block_reg
[bb
] - npi
->min_reg
))
5095 /* Try to compute whether the compare/branch at the loop end is one or
5096 two instructions. */
5097 if (earliest
== last_insn
)
5098 compare_and_branch
= 1;
5099 else if (earliest
== prev_nonnote_insn (last_insn
))
5100 compare_and_branch
= 2;
5104 /* We know the register in this comparison is nonnull at exit from
5105 this block. We can optimize this comparison. */
5106 if (GET_CODE (condition
) == NE
)
5110 new_jump
= emit_jump_insn_before (gen_jump (JUMP_LABEL (last_insn
)),
5112 JUMP_LABEL (new_jump
) = JUMP_LABEL (last_insn
);
5113 LABEL_NUSES (JUMP_LABEL (new_jump
))++;
5114 emit_barrier_after (new_jump
);
5117 VARRAY_RTX_INIT (*delete_list
, 10, "delete_list");
5119 VARRAY_PUSH_RTX (*delete_list
, last_insn
);
5120 if (compare_and_branch
== 2)
5121 VARRAY_PUSH_RTX (*delete_list
, earliest
);
5123 /* Don't check this block again. (Note that BLOCK_END is
5124 invalid here; we deleted the last instruction in the
5130 /* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
5133 This is conceptually similar to global constant/copy propagation and
5134 classic global CSE (it even uses the same dataflow equations as cprop).
5136 If a register is used as memory address with the form (mem (reg)), then we
5137 know that REG can not be zero at that point in the program. Any instruction
5138 which sets REG "kills" this property.
5140 So, if every path leading to a conditional branch has an available memory
5141 reference of that form, then we know the register can not have the value
5142 zero at the conditional branch.
5144 So we merely need to compute the local properies and propagate that data
5145 around the cfg, then optimize where possible.
5147 We run this pass two times. Once before CSE, then again after CSE. This
5148 has proven to be the most profitable approach. It is rare for new
5149 optimization opportunities of this nature to appear after the first CSE
5152 This could probably be integrated with global cprop with a little work. */
5155 delete_null_pointer_checks (f
)
5156 rtx f ATTRIBUTE_UNUSED
;
5158 sbitmap
*nonnull_avin
, *nonnull_avout
;
5159 unsigned int *block_reg
;
5160 varray_type delete_list
= NULL
;
5166 struct null_pointer_info npi
;
5168 /* If we have only a single block, then there's nothing to do. */
5169 if (n_basic_blocks
<= 1)
5172 /* Trying to perform global optimizations on flow graphs which have
5173 a high connectivity will take a long time and is unlikely to be
5174 particularly useful.
5176 In normal circumstances a cfg should have about twice has many edges
5177 as blocks. But we do not want to punish small functions which have
5178 a couple switch statements. So we require a relatively large number
5179 of basic blocks and the ratio of edges to blocks to be high. */
5180 if (n_basic_blocks
> 1000 && n_edges
/ n_basic_blocks
>= 20)
5183 /* We need four bitmaps, each with a bit for each register in each
5185 max_reg
= max_reg_num ();
5186 regs_per_pass
= get_bitmap_width (4, n_basic_blocks
, max_reg
);
5188 /* Allocate bitmaps to hold local and global properties. */
5189 npi
.nonnull_local
= sbitmap_vector_alloc (n_basic_blocks
, regs_per_pass
);
5190 npi
.nonnull_killed
= sbitmap_vector_alloc (n_basic_blocks
, regs_per_pass
);
5191 nonnull_avin
= sbitmap_vector_alloc (n_basic_blocks
, regs_per_pass
);
5192 nonnull_avout
= sbitmap_vector_alloc (n_basic_blocks
, regs_per_pass
);
5194 /* Go through the basic blocks, seeing whether or not each block
5195 ends with a conditional branch whose condition is a comparison
5196 against zero. Record the register compared in BLOCK_REG. */
5197 block_reg
= (unsigned int *) xcalloc (n_basic_blocks
, sizeof (int));
5198 for (bb
= 0; bb
< n_basic_blocks
; bb
++)
5200 rtx last_insn
= BLOCK_END (bb
);
5201 rtx condition
, earliest
, reg
;
5203 /* We only want conditional branches. */
5204 if (GET_CODE (last_insn
) != JUMP_INSN
5205 || !any_condjump_p (last_insn
)
5206 || !onlyjump_p (last_insn
))
5209 /* LAST_INSN is a conditional jump. Get its condition. */
5210 condition
= get_condition (last_insn
, &earliest
);
5212 /* If we were unable to get the condition, or it is not a equality
5213 comparison against zero then there's nothing we can do. */
5215 || (GET_CODE (condition
) != NE
&& GET_CODE (condition
) != EQ
)
5216 || GET_CODE (XEXP (condition
, 1)) != CONST_INT
5217 || (XEXP (condition
, 1)
5218 != CONST0_RTX (GET_MODE (XEXP (condition
, 0)))))
5221 /* We must be checking a register against zero. */
5222 reg
= XEXP (condition
, 0);
5223 if (GET_CODE (reg
) != REG
)
5226 block_reg
[bb
] = REGNO (reg
);
5229 /* Go through the algorithm for each block of registers. */
5230 for (reg
= FIRST_PSEUDO_REGISTER
; reg
< max_reg
; reg
+= regs_per_pass
)
5233 npi
.max_reg
= MIN (reg
+ regs_per_pass
, max_reg
);
5234 delete_null_pointer_checks_1 (&delete_list
, block_reg
, nonnull_avin
,
5235 nonnull_avout
, &npi
);
5238 /* Now delete the instructions all at once. This breaks the CFG. */
5241 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (delete_list
); i
++)
5242 delete_insn (VARRAY_RTX (delete_list
, i
));
5243 VARRAY_FREE (delete_list
);
5246 /* Free the table of registers compared at the end of every block. */
5250 free (npi
.nonnull_local
);
5251 free (npi
.nonnull_killed
);
5252 free (nonnull_avin
);
5253 free (nonnull_avout
);
5256 /* Code Hoisting variables and subroutines. */
5258 /* Very busy expressions. */
5259 static sbitmap
*hoist_vbein
;
5260 static sbitmap
*hoist_vbeout
;
5262 /* Hoistable expressions. */
5263 static sbitmap
*hoist_exprs
;
5265 /* Dominator bitmaps. */
5266 static sbitmap
*dominators
;
5268 /* ??? We could compute post dominators and run this algorithm in
5269 reverse to to perform tail merging, doing so would probably be
5270 more effective than the tail merging code in jump.c.
5272 It's unclear if tail merging could be run in parallel with
5273 code hoisting. It would be nice. */
5275 /* Allocate vars used for code hoisting analysis. */
5278 alloc_code_hoist_mem (n_blocks
, n_exprs
)
5279 int n_blocks
, n_exprs
;
5281 antloc
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
5282 transp
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
5283 comp
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
5285 hoist_vbein
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
5286 hoist_vbeout
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
5287 hoist_exprs
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
5288 transpout
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
5290 dominators
= sbitmap_vector_alloc (n_blocks
, n_blocks
);
5293 /* Free vars used for code hoisting analysis. */
5296 free_code_hoist_mem ()
5303 free (hoist_vbeout
);
5310 /* Compute the very busy expressions at entry/exit from each block.
5312 An expression is very busy if all paths from a given point
5313 compute the expression. */
5316 compute_code_hoist_vbeinout ()
5318 int bb
, changed
, passes
;
5320 sbitmap_vector_zero (hoist_vbeout
, n_basic_blocks
);
5321 sbitmap_vector_zero (hoist_vbein
, n_basic_blocks
);
5330 /* We scan the blocks in the reverse order to speed up
5332 for (bb
= n_basic_blocks
- 1; bb
>= 0; bb
--)
5334 changed
|= sbitmap_a_or_b_and_c (hoist_vbein
[bb
], antloc
[bb
],
5335 hoist_vbeout
[bb
], transp
[bb
]);
5336 if (bb
!= n_basic_blocks
- 1)
5337 sbitmap_intersection_of_succs (hoist_vbeout
[bb
], hoist_vbein
, bb
);
5344 fprintf (gcse_file
, "hoisting vbeinout computation: %d passes\n", passes
);
5347 /* Top level routine to do the dataflow analysis needed by code hoisting. */
5350 compute_code_hoist_data ()
5352 compute_local_properties (transp
, comp
, antloc
, 0);
5353 compute_transpout ();
5354 compute_code_hoist_vbeinout ();
5355 calculate_dominance_info (NULL
, dominators
, CDI_DOMINATORS
);
5357 fprintf (gcse_file
, "\n");
5360 /* Determine if the expression identified by EXPR_INDEX would
5361 reach BB unimpared if it was placed at the end of EXPR_BB.
5363 It's unclear exactly what Muchnick meant by "unimpared". It seems
5364 to me that the expression must either be computed or transparent in
5365 *every* block in the path(s) from EXPR_BB to BB. Any other definition
5366 would allow the expression to be hoisted out of loops, even if
5367 the expression wasn't a loop invariant.
5369 Contrast this to reachability for PRE where an expression is
5370 considered reachable if *any* path reaches instead of *all*
5374 hoist_expr_reaches_here_p (expr_bb
, expr_index
, bb
, visited
)
5381 int visited_allocated_locally
= 0;
5384 if (visited
== NULL
)
5386 visited_allocated_locally
= 1;
5387 visited
= xcalloc (n_basic_blocks
, 1);
5390 for (pred
= BASIC_BLOCK (bb
)->pred
; pred
!= NULL
; pred
= pred
->pred_next
)
5392 int pred_bb
= pred
->src
->index
;
5394 if (pred
->src
== ENTRY_BLOCK_PTR
)
5396 else if (visited
[pred_bb
])
5399 /* Does this predecessor generate this expression? */
5400 else if (TEST_BIT (comp
[pred_bb
], expr_index
))
5402 else if (! TEST_BIT (transp
[pred_bb
], expr_index
))
5408 visited
[pred_bb
] = 1;
5409 if (! hoist_expr_reaches_here_p (expr_bb
, expr_index
,
5414 if (visited_allocated_locally
)
5417 return (pred
== NULL
);
5420 /* Actually perform code hoisting. */
5427 struct expr
**index_map
;
5430 sbitmap_vector_zero (hoist_exprs
, n_basic_blocks
);
5432 /* Compute a mapping from expression number (`bitmap_index') to
5433 hash table entry. */
5435 index_map
= (struct expr
**) xcalloc (n_exprs
, sizeof (struct expr
*));
5436 for (i
= 0; i
< expr_hash_table_size
; i
++)
5437 for (expr
= expr_hash_table
[i
]; expr
!= NULL
; expr
= expr
->next_same_hash
)
5438 index_map
[expr
->bitmap_index
] = expr
;
5440 /* Walk over each basic block looking for potentially hoistable
5441 expressions, nothing gets hoisted from the entry block. */
5442 for (bb
= 0; bb
< n_basic_blocks
; bb
++)
5445 int insn_inserted_p
;
5447 /* Examine each expression that is very busy at the exit of this
5448 block. These are the potentially hoistable expressions. */
5449 for (i
= 0; i
< hoist_vbeout
[bb
]->n_bits
; i
++)
5453 if (TEST_BIT (hoist_vbeout
[bb
], i
) && TEST_BIT (transpout
[bb
], i
))
5455 /* We've found a potentially hoistable expression, now
5456 we look at every block BB dominates to see if it
5457 computes the expression. */
5458 for (dominated
= 0; dominated
< n_basic_blocks
; dominated
++)
5460 /* Ignore self dominance. */
5462 || ! TEST_BIT (dominators
[dominated
], bb
))
5465 /* We've found a dominated block, now see if it computes
5466 the busy expression and whether or not moving that
5467 expression to the "beginning" of that block is safe. */
5468 if (!TEST_BIT (antloc
[dominated
], i
))
5471 /* Note if the expression would reach the dominated block
5472 unimpared if it was placed at the end of BB.
5474 Keep track of how many times this expression is hoistable
5475 from a dominated block into BB. */
5476 if (hoist_expr_reaches_here_p (bb
, i
, dominated
, NULL
))
5480 /* If we found more than one hoistable occurence of this
5481 expression, then note it in the bitmap of expressions to
5482 hoist. It makes no sense to hoist things which are computed
5483 in only one BB, and doing so tends to pessimize register
5484 allocation. One could increase this value to try harder
5485 to avoid any possible code expansion due to register
5486 allocation issues; however experiments have shown that
5487 the vast majority of hoistable expressions are only movable
5488 from two successors, so raising this threshhold is likely
5489 to nullify any benefit we get from code hoisting. */
5492 SET_BIT (hoist_exprs
[bb
], i
);
5498 /* If we found nothing to hoist, then quit now. */
5502 /* Loop over all the hoistable expressions. */
5503 for (i
= 0; i
< hoist_exprs
[bb
]->n_bits
; i
++)
5505 /* We want to insert the expression into BB only once, so
5506 note when we've inserted it. */
5507 insn_inserted_p
= 0;
5509 /* These tests should be the same as the tests above. */
5510 if (TEST_BIT (hoist_vbeout
[bb
], i
))
5512 /* We've found a potentially hoistable expression, now
5513 we look at every block BB dominates to see if it
5514 computes the expression. */
5515 for (dominated
= 0; dominated
< n_basic_blocks
; dominated
++)
5517 /* Ignore self dominance. */
5519 || ! TEST_BIT (dominators
[dominated
], bb
))
5522 /* We've found a dominated block, now see if it computes
5523 the busy expression and whether or not moving that
5524 expression to the "beginning" of that block is safe. */
5525 if (!TEST_BIT (antloc
[dominated
], i
))
5528 /* The expression is computed in the dominated block and
5529 it would be safe to compute it at the start of the
5530 dominated block. Now we have to determine if the
5531 expresion would reach the dominated block if it was
5532 placed at the end of BB. */
5533 if (hoist_expr_reaches_here_p (bb
, i
, dominated
, NULL
))
5535 struct expr
*expr
= index_map
[i
];
5536 struct occr
*occr
= expr
->antic_occr
;
5540 /* Find the right occurence of this expression. */
5541 while (BLOCK_NUM (occr
->insn
) != dominated
&& occr
)
5544 /* Should never happen. */
5550 set
= single_set (insn
);
5554 /* Create a pseudo-reg to store the result of reaching
5555 expressions into. Get the mode for the new pseudo
5556 from the mode of the original destination pseudo. */
5557 if (expr
->reaching_reg
== NULL
)
5559 = gen_reg_rtx (GET_MODE (SET_DEST (set
)));
5561 /* In theory this should never fail since we're creating
5564 However, on the x86 some of the movXX patterns
5565 actually contain clobbers of scratch regs. This may
5566 cause the insn created by validate_change to not
5567 match any pattern and thus cause validate_change to
5569 if (validate_change (insn
, &SET_SRC (set
),
5570 expr
->reaching_reg
, 0))
5572 occr
->deleted_p
= 1;
5573 if (!insn_inserted_p
)
5575 insert_insn_end_bb (index_map
[i
], bb
, 0);
5576 insn_inserted_p
= 1;
5588 /* Top level routine to perform one code hoisting (aka unification) pass
5590 Return non-zero if a change was made. */
5593 one_code_hoisting_pass ()
5597 alloc_expr_hash_table (max_cuid
);
5598 compute_expr_hash_table ();
5600 dump_hash_table (gcse_file
, "Code Hosting Expressions", expr_hash_table
,
5601 expr_hash_table_size
, n_exprs
);
5605 alloc_code_hoist_mem (n_basic_blocks
, n_exprs
);
5606 compute_code_hoist_data ();
5608 free_code_hoist_mem ();
5611 free_expr_hash_table ();