1 /* Global common subexpression elimination/Partial redundancy elimination
2 and global constant/copy propagation for GNU compiler.
3 Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
23 - reordering of memory allocation and freeing to be more space efficient
24 - do rough calc of how many regs are needed in each block, and a rough
25 calc of how many regs are available in each class and use that to
26 throttle back the code in cases where RTX_COST is minimal.
27 - dead store elimination
28 - a store to the same address as a load does not kill the load if the
29 source of the store is also the destination of the load. Handling this
30 allows more load motion, particularly out of loops.
31 - ability to realloc sbitmap vectors would allow one initial computation
32 of reg_set_in_block with only subsequent additions, rather than
33 recomputing it for each pass
37 /* References searched while implementing this.
39 Compilers Principles, Techniques and Tools
43 Global Optimization by Suppression of Partial Redundancies
45 communications of the acm, Vol. 22, Num. 2, Feb. 1979
47 A Portable Machine-Independent Global Optimizer - Design and Measurements
49 Stanford Ph.D. thesis, Dec. 1983
51 A Fast Algorithm for Code Movement Optimization
53 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
55 A Solution to a Problem with Morel and Renvoise's
56 Global Optimization by Suppression of Partial Redundancies
57 K-H Drechsler, M.P. Stadel
58 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
60 Practical Adaptation of the Global Optimization
61 Algorithm of Morel and Renvoise
63 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
65 Efficiently Computing Static Single Assignment Form and the Control
67 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
68 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
71 J. Knoop, O. Ruthing, B. Steffen
72 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
74 What's In a Region? Or Computing Control Dependence Regions in Near-Linear
75 Time for Reducible Flow Control
77 ACM Letters on Programming Languages and Systems,
78 Vol. 2, Num. 1-4, Mar-Dec 1993
80 An Efficient Representation for Sparse Sets
81 Preston Briggs, Linda Torczon
82 ACM Letters on Programming Languages and Systems,
83 Vol. 2, Num. 1-4, Mar-Dec 1993
85 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
86 K-H Drechsler, M.P. Stadel
87 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
89 Partial Dead Code Elimination
90 J. Knoop, O. Ruthing, B. Steffen
91 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
93 Effective Partial Redundancy Elimination
94 P. Briggs, K.D. Cooper
95 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
97 The Program Structure Tree: Computing Control Regions in Linear Time
98 R. Johnson, D. Pearson, K. Pingali
99 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
101 Optimal Code Motion: Theory and Practice
102 J. Knoop, O. Ruthing, B. Steffen
103 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
105 The power of assignment motion
106 J. Knoop, O. Ruthing, B. Steffen
107 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
109 Global code motion / global value numbering
111 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
113 Value Driven Redundancy Elimination
115 Rice University Ph.D. thesis, Apr. 1996
119 Massively Scalar Compiler Project, Rice University, Sep. 1996
121 High Performance Compilers for Parallel Computing
125 Advanced Compiler Design and Implementation
127 Morgan Kaufmann, 1997
129 Building an Optimizing Compiler
133 People wishing to speed up the code here should read:
134 Elimination Algorithms for Data Flow Analysis
135 B.G. Ryder, M.C. Paull
136 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
138 How to Analyze Large Programs Efficiently and Informatively
139 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
140 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
142 People wishing to do something different can find various possibilities
143 in the above papers and elsewhere.
153 #include "hard-reg-set.h"
156 #include "insn-config.h"
158 #include "basic-block.h"
160 #include "function.h"
165 #define obstack_chunk_alloc gmalloc
166 #define obstack_chunk_free free
168 /* Maximum number of passes to perform. */
171 /* Propagate flow information through back edges and thus enable PRE's
172 moving loop invariant calculations out of loops.
174 Originally this tended to create worse overall code, but several
175 improvements during the development of PRE seem to have made following
176 back edges generally a win.
178 Note much of the loop invariant code motion done here would normally
179 be done by loop.c, which has more heuristics for when to move invariants
180 out of loops. At some point we might need to move some of those
181 heuristics into gcse.c. */
182 #define FOLLOW_BACK_EDGES 1
184 /* We support GCSE via Partial Redundancy Elimination. PRE optimizations
185 are a superset of those done by GCSE.
187 We perform the following steps:
189 1) Compute basic block information.
191 2) Compute table of places where registers are set.
193 3) Perform copy/constant propagation.
195 4) Perform global cse.
197 5) Perform another pass of copy/constant propagation.
199 Two passes of copy/constant propagation are done because the first one
200 enables more GCSE and the second one helps to clean up the copies that
201 GCSE creates. This is needed more for PRE than for Classic because Classic
202 GCSE will try to use an existing register containing the common
203 subexpression rather than create a new one. This is harder to do for PRE
204 because of the code motion (which Classic GCSE doesn't do).
206 Expressions we are interested in GCSE-ing are of the form
207 (set (pseudo-reg) (expression)).
208 Function want_to_gcse_p says what these are.
210 PRE handles moving invariant expressions out of loops (by treating them as
211 partially redundant).
213 Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
214 assignment) based GVN (global value numbering). L. T. Simpson's paper
215 (Rice University) on value numbering is a useful reference for this.
217 **********************
219 We used to support multiple passes but there are diminishing returns in
220 doing so. The first pass usually makes 90% of the changes that are doable.
221 A second pass can make a few more changes made possible by the first pass.
222 Experiments show any further passes don't make enough changes to justify
225 A study of spec92 using an unlimited number of passes:
226 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
227 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
228 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
230 It was found doing copy propagation between each pass enables further
233 PRE is quite expensive in complicated functions because the DFA can take
234 awhile to converge. Hence we only perform one pass. Macro MAX_PASSES can
235 be modified if one wants to experiment.
237 **********************
239 The steps for PRE are:
241 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
243 2) Perform the data flow analysis for PRE.
245 3) Delete the redundant instructions
247 4) Insert the required copies [if any] that make the partially
248 redundant instructions fully redundant.
250 5) For other reaching expressions, insert an instruction to copy the value
251 to a newly created pseudo that will reach the redundant instruction.
253 The deletion is done first so that when we do insertions we
254 know which pseudo reg to use.
256 Various papers have argued that PRE DFA is expensive (O(n^2)) and others
257 argue it is not. The number of iterations for the algorithm to converge
258 is typically 2-4 so I don't view it as that expensive (relatively speaking).
260 PRE GCSE depends heavily on the second CSE pass to clean up the copies
261 we create. To make an expression reach the place where it's redundant,
262 the result of the expression is copied to a new register, and the redundant
263 expression is deleted by replacing it with this new register. Classic GCSE
264 doesn't have this problem as much as it computes the reaching defs of
265 each register in each block and thus can try to use an existing register.
267 **********************
269 A fair bit of simplicity is created by creating small functions for simple
270 tasks, even when the function is only called in one place. This may
271 measurably slow things down [or may not] by creating more function call
272 overhead than is necessary. The source is laid out so that it's trivial
273 to make the affected functions inline so that one can measure what speed
274 up, if any, can be achieved, and maybe later when things settle things can
277 Help stamp out big monolithic functions! */
279 /* GCSE global vars. */
282 static FILE *gcse_file
;
284 /* Note whether or not we should run jump optimization after gcse. We
285 want to do this for two cases.
287 * If we changed any jumps via cprop.
289 * If we added any labels via edge splitting. */
291 static int run_jump_opt_after_gcse
;
293 /* Bitmaps are normally not included in debugging dumps.
294 However it's useful to be able to print them from GDB.
295 We could create special functions for this, but it's simpler to
296 just allow passing stderr to the dump_foo fns. Since stderr can
297 be a macro, we store a copy here. */
298 static FILE *debug_stderr
;
300 /* An obstack for our working variables. */
301 static struct obstack gcse_obstack
;
303 /* Non-zero for each mode that supports (set (reg) (reg)).
304 This is trivially true for integer and floating point values.
305 It may or may not be true for condition codes. */
306 static char can_copy_p
[(int) NUM_MACHINE_MODES
];
308 /* Non-zero if can_copy_p has been initialized. */
309 static int can_copy_init_p
;
311 struct reg_use
{rtx reg_rtx
; };
313 /* Hash table of expressions. */
317 /* The expression (SET_SRC for expressions, PATTERN for assignments). */
319 /* Index in the available expression bitmaps. */
321 /* Next entry with the same hash. */
322 struct expr
*next_same_hash
;
323 /* List of anticipatable occurrences in basic blocks in the function.
324 An "anticipatable occurrence" is one that is the first occurrence in the
325 basic block, the operands are not modified in the basic block prior
326 to the occurrence and the output is not used between the start of
327 the block and the occurrence. */
328 struct occr
*antic_occr
;
329 /* List of available occurrence in basic blocks in the function.
330 An "available occurrence" is one that is the last occurrence in the
331 basic block and the operands are not modified by following statements in
332 the basic block [including this insn]. */
333 struct occr
*avail_occr
;
334 /* Non-null if the computation is PRE redundant.
335 The value is the newly created pseudo-reg to record a copy of the
336 expression in all the places that reach the redundant copy. */
340 /* Occurrence of an expression.
341 There is one per basic block. If a pattern appears more than once the
342 last appearance is used [or first for anticipatable expressions]. */
346 /* Next occurrence of this expression. */
348 /* The insn that computes the expression. */
350 /* Non-zero if this [anticipatable] occurrence has been deleted. */
352 /* Non-zero if this [available] occurrence has been copied to
354 /* ??? This is mutually exclusive with deleted_p, so they could share
359 /* Expression and copy propagation hash tables.
360 Each hash table is an array of buckets.
361 ??? It is known that if it were an array of entries, structure elements
362 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
363 not clear whether in the final analysis a sufficient amount of memory would
364 be saved as the size of the available expression bitmaps would be larger
365 [one could build a mapping table without holes afterwards though].
366 Someday I'll perform the computation and figure it out. */
368 /* Total size of the expression hash table, in elements. */
369 static unsigned int expr_hash_table_size
;
372 This is an array of `expr_hash_table_size' elements. */
373 static struct expr
**expr_hash_table
;
375 /* Total size of the copy propagation hash table, in elements. */
376 static unsigned int set_hash_table_size
;
379 This is an array of `set_hash_table_size' elements. */
380 static struct expr
**set_hash_table
;
382 /* Mapping of uids to cuids.
383 Only real insns get cuids. */
384 static int *uid_cuid
;
386 /* Highest UID in UID_CUID. */
389 /* Get the cuid of an insn. */
390 #ifdef ENABLE_CHECKING
391 #define INSN_CUID(INSN) (INSN_UID (INSN) > max_uid ? (abort (), 0) : uid_cuid[INSN_UID (INSN)])
393 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
396 /* Number of cuids. */
399 /* Mapping of cuids to insns. */
400 static rtx
*cuid_insn
;
402 /* Get insn from cuid. */
403 #define CUID_INSN(CUID) (cuid_insn[CUID])
405 /* Maximum register number in function prior to doing gcse + 1.
406 Registers created during this pass have regno >= max_gcse_regno.
407 This is named with "gcse" to not collide with global of same name. */
408 static unsigned int max_gcse_regno
;
410 /* Maximum number of cse-able expressions found. */
413 /* Maximum number of assignments for copy propagation found. */
416 /* Table of registers that are modified.
418 For each register, each element is a list of places where the pseudo-reg
421 For simplicity, GCSE is done on sets of pseudo-regs only. PRE GCSE only
422 requires knowledge of which blocks kill which regs [and thus could use
423 a bitmap instead of the lists `reg_set_table' uses].
425 `reg_set_table' and could be turned into an array of bitmaps (num-bbs x
426 num-regs) [however perhaps it may be useful to keep the data as is]. One
427 advantage of recording things this way is that `reg_set_table' is fairly
428 sparse with respect to pseudo regs but for hard regs could be fairly dense
429 [relatively speaking]. And recording sets of pseudo-regs in lists speeds
430 up functions like compute_transp since in the case of pseudo-regs we only
431 need to iterate over the number of times a pseudo-reg is set, not over the
432 number of basic blocks [clearly there is a bit of a slow down in the cases
433 where a pseudo is set more than once in a block, however it is believed
434 that the net effect is to speed things up]. This isn't done for hard-regs
435 because recording call-clobbered hard-regs in `reg_set_table' at each
436 function call can consume a fair bit of memory, and iterating over
437 hard-regs stored this way in compute_transp will be more expensive. */
439 typedef struct reg_set
441 /* The next setting of this register. */
442 struct reg_set
*next
;
443 /* The insn where it was set. */
447 static reg_set
**reg_set_table
;
449 /* Size of `reg_set_table'.
450 The table starts out at max_gcse_regno + slop, and is enlarged as
452 static int reg_set_table_size
;
454 /* Amount to grow `reg_set_table' by when it's full. */
455 #define REG_SET_TABLE_SLOP 100
457 /* Bitmap containing one bit for each register in the program.
458 Used when performing GCSE to track which registers have been set since
459 the start of the basic block. */
460 static sbitmap reg_set_bitmap
;
462 /* For each block, a bitmap of registers set in the block.
463 This is used by expr_killed_p and compute_transp.
464 It is computed during hash table computation and not by compute_sets
465 as it includes registers added since the last pass (or between cprop and
466 gcse) and it's currently not easy to realloc sbitmap vectors. */
467 static sbitmap
*reg_set_in_block
;
469 /* For each block, non-zero if memory is set in that block.
470 This is computed during hash table computation and is used by
471 expr_killed_p and compute_transp.
472 ??? Handling of memory is very simple, we don't make any attempt
473 to optimize things (later).
474 ??? This can be computed by compute_sets since the information
476 static char *mem_set_in_block
;
478 /* Various variables for statistics gathering. */
480 /* Memory used in a pass.
481 This isn't intended to be absolutely precise. Its intent is only
482 to keep an eye on memory usage. */
483 static int bytes_used
;
485 /* GCSE substitutions made. */
486 static int gcse_subst_count
;
487 /* Number of copy instructions created. */
488 static int gcse_create_count
;
489 /* Number of constants propagated. */
490 static int const_prop_count
;
491 /* Number of copys propagated. */
492 static int copy_prop_count
;
494 /* These variables are used by classic GCSE.
495 Normally they'd be defined a bit later, but `rd_gen' needs to
496 be declared sooner. */
498 /* Each block has a bitmap of each type.
499 The length of each blocks bitmap is:
501 max_cuid - for reaching definitions
502 n_exprs - for available expressions
504 Thus we view the bitmaps as 2 dimensional arrays. i.e.
505 rd_kill[block_num][cuid_num]
506 ae_kill[block_num][expr_num] */
508 /* For reaching defs */
509 static sbitmap
*rd_kill
, *rd_gen
, *reaching_defs
, *rd_out
;
511 /* for available exprs */
512 static sbitmap
*ae_kill
, *ae_gen
, *ae_in
, *ae_out
;
514 /* Objects of this type are passed around by the null-pointer check
516 struct null_pointer_info
518 /* The basic block being processed. */
520 /* The first register to be handled in this pass. */
521 unsigned int min_reg
;
522 /* One greater than the last register to be handled in this pass. */
523 unsigned int max_reg
;
524 sbitmap
*nonnull_local
;
525 sbitmap
*nonnull_killed
;
528 static void compute_can_copy
PARAMS ((void));
529 static char *gmalloc
PARAMS ((unsigned int));
530 static char *grealloc
PARAMS ((char *, unsigned int));
531 static char *gcse_alloc
PARAMS ((unsigned long));
532 static void alloc_gcse_mem
PARAMS ((rtx
));
533 static void free_gcse_mem
PARAMS ((void));
534 static void alloc_reg_set_mem
PARAMS ((int));
535 static void free_reg_set_mem
PARAMS ((void));
536 static int get_bitmap_width
PARAMS ((int, int, int));
537 static void record_one_set
PARAMS ((int, rtx
));
538 static void record_set_info
PARAMS ((rtx
, rtx
, void *));
539 static void compute_sets
PARAMS ((rtx
));
540 static void hash_scan_insn
PARAMS ((rtx
, int, int));
541 static void hash_scan_set
PARAMS ((rtx
, rtx
, int));
542 static void hash_scan_clobber
PARAMS ((rtx
, rtx
));
543 static void hash_scan_call
PARAMS ((rtx
, rtx
));
544 static int want_to_gcse_p
PARAMS ((rtx
));
545 static int oprs_unchanged_p
PARAMS ((rtx
, rtx
, int));
546 static int oprs_anticipatable_p
PARAMS ((rtx
, rtx
));
547 static int oprs_available_p
PARAMS ((rtx
, rtx
));
548 static void insert_expr_in_table
PARAMS ((rtx
, enum machine_mode
, rtx
,
550 static void insert_set_in_table
PARAMS ((rtx
, rtx
));
551 static unsigned int hash_expr
PARAMS ((rtx
, enum machine_mode
, int *, int));
552 static unsigned int hash_expr_1
PARAMS ((rtx
, enum machine_mode
, int *));
553 static unsigned int hash_string_1
PARAMS ((const char *));
554 static unsigned int hash_set
PARAMS ((int, int));
555 static int expr_equiv_p
PARAMS ((rtx
, rtx
));
556 static void record_last_reg_set_info
PARAMS ((rtx
, int));
557 static void record_last_mem_set_info
PARAMS ((rtx
));
558 static void record_last_set_info
PARAMS ((rtx
, rtx
, void *));
559 static void compute_hash_table
PARAMS ((int));
560 static void alloc_set_hash_table
PARAMS ((int));
561 static void free_set_hash_table
PARAMS ((void));
562 static void compute_set_hash_table
PARAMS ((void));
563 static void alloc_expr_hash_table
PARAMS ((unsigned int));
564 static void free_expr_hash_table
PARAMS ((void));
565 static void compute_expr_hash_table
PARAMS ((void));
566 static void dump_hash_table
PARAMS ((FILE *, const char *, struct expr
**,
568 static struct expr
*lookup_expr
PARAMS ((rtx
));
569 static struct expr
*lookup_set
PARAMS ((unsigned int, rtx
));
570 static struct expr
*next_set
PARAMS ((unsigned int, struct expr
*));
571 static void reset_opr_set_tables
PARAMS ((void));
572 static int oprs_not_set_p
PARAMS ((rtx
, rtx
));
573 static void mark_call
PARAMS ((rtx
));
574 static void mark_set
PARAMS ((rtx
, rtx
));
575 static void mark_clobber
PARAMS ((rtx
, rtx
));
576 static void mark_oprs_set
PARAMS ((rtx
));
577 static void alloc_cprop_mem
PARAMS ((int, int));
578 static void free_cprop_mem
PARAMS ((void));
579 static void compute_transp
PARAMS ((rtx
, int, sbitmap
*, int));
580 static void compute_transpout
PARAMS ((void));
581 static void compute_local_properties
PARAMS ((sbitmap
*, sbitmap
*, sbitmap
*,
583 static void compute_cprop_data
PARAMS ((void));
584 static void find_used_regs
PARAMS ((rtx
));
585 static int try_replace_reg
PARAMS ((rtx
, rtx
, rtx
));
586 static struct expr
*find_avail_set
PARAMS ((int, rtx
));
587 static int cprop_jump
PARAMS ((rtx
, rtx
, struct reg_use
*, rtx
));
589 static int cprop_cc0_jump
PARAMS ((rtx
, struct reg_use
*, rtx
));
591 static int cprop_insn
PARAMS ((rtx
, int));
592 static int cprop
PARAMS ((int));
593 static int one_cprop_pass
PARAMS ((int, int));
594 static void alloc_pre_mem
PARAMS ((int, int));
595 static void free_pre_mem
PARAMS ((void));
596 static void compute_pre_data
PARAMS ((void));
597 static int pre_expr_reaches_here_p
PARAMS ((int, struct expr
*, int));
598 static void insert_insn_end_bb
PARAMS ((struct expr
*, int, int));
599 static void pre_insert_copy_insn
PARAMS ((struct expr
*, rtx
));
600 static void pre_insert_copies
PARAMS ((void));
601 static int pre_delete
PARAMS ((void));
602 static int pre_gcse
PARAMS ((void));
603 static int one_pre_gcse_pass
PARAMS ((int));
604 static void add_label_notes
PARAMS ((rtx
, rtx
));
605 static void alloc_code_hoist_mem
PARAMS ((int, int));
606 static void free_code_hoist_mem
PARAMS ((void));
607 static void compute_code_hoist_vbeinout
PARAMS ((void));
608 static void compute_code_hoist_data
PARAMS ((void));
609 static int hoist_expr_reaches_here_p
PARAMS ((int, int, int, char *));
610 static void hoist_code
PARAMS ((void));
611 static int one_code_hoisting_pass
PARAMS ((void));
612 static void alloc_rd_mem
PARAMS ((int, int));
613 static void free_rd_mem
PARAMS ((void));
614 static void handle_rd_kill_set
PARAMS ((rtx
, int, int));
615 static void compute_kill_rd
PARAMS ((void));
616 static void compute_rd
PARAMS ((void));
617 static void alloc_avail_expr_mem
PARAMS ((int, int));
618 static void free_avail_expr_mem
PARAMS ((void));
619 static void compute_ae_gen
PARAMS ((void));
620 static int expr_killed_p
PARAMS ((rtx
, int));
621 static void compute_ae_kill
PARAMS ((sbitmap
*, sbitmap
*));
622 static int expr_reaches_here_p
PARAMS ((struct occr
*, struct expr
*,
624 static rtx computing_insn
PARAMS ((struct expr
*, rtx
));
625 static int def_reaches_here_p
PARAMS ((rtx
, rtx
));
626 static int can_disregard_other_sets
PARAMS ((struct reg_set
**, rtx
, int));
627 static int handle_avail_expr
PARAMS ((rtx
, struct expr
*));
628 static int classic_gcse
PARAMS ((void));
629 static int one_classic_gcse_pass
PARAMS ((int));
630 static void invalidate_nonnull_info
PARAMS ((rtx
, rtx
, void *));
631 static void delete_null_pointer_checks_1
PARAMS ((varray_type
*, unsigned int *,
632 sbitmap
*, sbitmap
*,
633 struct null_pointer_info
*));
634 static rtx process_insert_insn
PARAMS ((struct expr
*));
635 static int pre_edge_insert
PARAMS ((struct edge_list
*, struct expr
**));
636 static int expr_reaches_here_p_work
PARAMS ((struct occr
*, struct expr
*,
638 static int pre_expr_reaches_here_p_work
PARAMS ((int, struct expr
*,
641 /* Entry point for global common subexpression elimination.
642 F is the first instruction in the function. */
650 /* Bytes used at start of pass. */
651 int initial_bytes_used
;
652 /* Maximum number of bytes used by a pass. */
654 /* Point to release obstack data from for each pass. */
655 char *gcse_obstack_bottom
;
657 /* We do not construct an accurate cfg in functions which call
658 setjmp, so just punt to be safe. */
659 if (current_function_calls_setjmp
)
662 /* Assume that we do not need to run jump optimizations after gcse. */
663 run_jump_opt_after_gcse
= 0;
665 /* For calling dump_foo fns from gdb. */
666 debug_stderr
= stderr
;
669 /* Identify the basic block information for this function, including
670 successors and predecessors. */
671 max_gcse_regno
= max_reg_num ();
674 dump_flow_info (file
);
676 /* Return if there's nothing to do. */
677 if (n_basic_blocks
<= 1)
680 /* Trying to perform global optimizations on flow graphs which have
681 a high connectivity will take a long time and is unlikely to be
684 In normal circumstances a cfg should have about twice as many edges
685 as blocks. But we do not want to punish small functions which have
686 a couple switch statements. So we require a relatively large number
687 of basic blocks and the ratio of edges to blocks to be high. */
688 if (n_basic_blocks
> 1000 && n_edges
/ n_basic_blocks
>= 20)
690 if (warn_disabled_optimization
)
691 warning ("GCSE disabled: %d > 1000 basic blocks and %d >= 20 edges/basic block",
692 n_basic_blocks
, n_edges
/ n_basic_blocks
);
696 /* If allocating memory for the cprop bitmap would take up too much
697 storage it's better just to disable the optimization. */
699 * SBITMAP_SET_SIZE (max_gcse_regno
)
700 * sizeof (SBITMAP_ELT_TYPE
)) > MAX_GCSE_MEMORY
)
702 if (warn_disabled_optimization
)
703 warning ("GCSE disabled: %d basic blocks and %d registers",
704 n_basic_blocks
, max_gcse_regno
);
709 /* See what modes support reg/reg copy operations. */
710 if (! can_copy_init_p
)
716 gcc_obstack_init (&gcse_obstack
);
719 /* Record where pseudo-registers are set. This data is kept accurate
720 during each pass. ??? We could also record hard-reg information here
721 [since it's unchanging], however it is currently done during hash table
724 It may be tempting to compute MEM set information here too, but MEM sets
725 will be subject to code motion one day and thus we need to compute
726 information about memory sets when we build the hash tables. */
728 alloc_reg_set_mem (max_gcse_regno
);
732 initial_bytes_used
= bytes_used
;
734 gcse_obstack_bottom
= gcse_alloc (1);
736 while (changed
&& pass
< MAX_PASSES
)
740 fprintf (file
, "GCSE pass %d\n\n", pass
+ 1);
742 /* Initialize bytes_used to the space for the pred/succ lists,
743 and the reg_set_table data. */
744 bytes_used
= initial_bytes_used
;
746 /* Each pass may create new registers, so recalculate each time. */
747 max_gcse_regno
= max_reg_num ();
751 /* Don't allow constant propagation to modify jumps
753 changed
= one_cprop_pass (pass
+ 1, 0);
756 changed
|= one_classic_gcse_pass (pass
+ 1);
759 changed
|= one_pre_gcse_pass (pass
+ 1);
761 alloc_reg_set_mem (max_reg_num ());
763 run_jump_opt_after_gcse
= 1;
766 if (max_pass_bytes
< bytes_used
)
767 max_pass_bytes
= bytes_used
;
769 /* Free up memory, then reallocate for code hoisting. We can
770 not re-use the existing allocated memory because the tables
771 will not have info for the insns or registers created by
772 partial redundancy elimination. */
775 /* It does not make sense to run code hoisting unless we optimizing
776 for code size -- it rarely makes programs faster, and can make
777 them bigger if we did partial redundancy elimination (when optimizing
778 for space, we use a classic gcse algorithm instead of partial
779 redundancy algorithms). */
782 max_gcse_regno
= max_reg_num ();
784 changed
|= one_code_hoisting_pass ();
787 if (max_pass_bytes
< bytes_used
)
788 max_pass_bytes
= bytes_used
;
793 fprintf (file
, "\n");
797 obstack_free (&gcse_obstack
, gcse_obstack_bottom
);
801 /* Do one last pass of copy propagation, including cprop into
802 conditional jumps. */
804 max_gcse_regno
= max_reg_num ();
806 /* This time, go ahead and allow cprop to alter jumps. */
807 one_cprop_pass (pass
+ 1, 1);
812 fprintf (file
, "GCSE of %s: %d basic blocks, ",
813 current_function_name
, n_basic_blocks
);
814 fprintf (file
, "%d pass%s, %d bytes\n\n",
815 pass
, pass
> 1 ? "es" : "", max_pass_bytes
);
818 obstack_free (&gcse_obstack
, NULL_PTR
);
820 return run_jump_opt_after_gcse
;
823 /* Misc. utilities. */
825 /* Compute which modes support reg/reg copy operations. */
831 #ifndef AVOID_CCMODE_COPIES
834 memset (can_copy_p
, 0, NUM_MACHINE_MODES
);
837 for (i
= 0; i
< NUM_MACHINE_MODES
; i
++)
838 if (GET_MODE_CLASS (i
) == MODE_CC
)
840 #ifdef AVOID_CCMODE_COPIES
843 reg
= gen_rtx_REG ((enum machine_mode
) i
, LAST_VIRTUAL_REGISTER
+ 1);
844 insn
= emit_insn (gen_rtx_SET (VOIDmode
, reg
, reg
));
845 if (recog (PATTERN (insn
), insn
, NULL_PTR
) >= 0)
855 /* Cover function to xmalloc to record bytes allocated. */
862 return xmalloc (size
);
865 /* Cover function to xrealloc.
866 We don't record the additional size since we don't know it.
867 It won't affect memory usage stats much anyway. */
874 return xrealloc (ptr
, size
);
877 /* Cover function to obstack_alloc.
878 We don't need to record the bytes allocated here since
879 obstack_chunk_alloc is set to gmalloc. */
885 return (char *) obstack_alloc (&gcse_obstack
, size
);
888 /* Allocate memory for the cuid mapping array,
889 and reg/memory set tracking tables.
891 This is called at the start of each pass. */
900 /* Find the largest UID and create a mapping from UIDs to CUIDs.
901 CUIDs are like UIDs except they increase monotonically, have no gaps,
902 and only apply to real insns. */
904 max_uid
= get_max_uid ();
905 n
= (max_uid
+ 1) * sizeof (int);
906 uid_cuid
= (int *) gmalloc (n
);
907 memset ((char *) uid_cuid
, 0, n
);
908 for (insn
= f
, i
= 0; insn
; insn
= NEXT_INSN (insn
))
911 uid_cuid
[INSN_UID (insn
)] = i
++;
913 uid_cuid
[INSN_UID (insn
)] = i
;
916 /* Create a table mapping cuids to insns. */
919 n
= (max_cuid
+ 1) * sizeof (rtx
);
920 cuid_insn
= (rtx
*) gmalloc (n
);
921 memset ((char *) cuid_insn
, 0, n
);
922 for (insn
= f
, i
= 0; insn
; insn
= NEXT_INSN (insn
))
924 CUID_INSN (i
++) = insn
;
926 /* Allocate vars to track sets of regs. */
927 reg_set_bitmap
= (sbitmap
) sbitmap_alloc (max_gcse_regno
);
929 /* Allocate vars to track sets of regs, memory per block. */
930 reg_set_in_block
= (sbitmap
*) sbitmap_vector_alloc (n_basic_blocks
,
932 mem_set_in_block
= (char *) gmalloc (n_basic_blocks
);
935 /* Free memory allocated by alloc_gcse_mem. */
943 free (reg_set_bitmap
);
945 free (reg_set_in_block
);
946 free (mem_set_in_block
);
949 /* Many of the global optimization algorithms work by solving dataflow
950 equations for various expressions. Initially, some local value is
951 computed for each expression in each block. Then, the values across the
952 various blocks are combined (by following flow graph edges) to arrive at
953 global values. Conceptually, each set of equations is independent. We
954 may therefore solve all the equations in parallel, solve them one at a
955 time, or pick any intermediate approach.
957 When you're going to need N two-dimensional bitmaps, each X (say, the
958 number of blocks) by Y (say, the number of expressions), call this
959 function. It's not important what X and Y represent; only that Y
960 correspond to the things that can be done in parallel. This function will
961 return an appropriate chunking factor C; you should solve C sets of
962 equations in parallel. By going through this function, we can easily
963 trade space against time; by solving fewer equations in parallel we use
967 get_bitmap_width (n
, x
, y
)
972 /* It's not really worth figuring out *exactly* how much memory will
973 be used by a particular choice. The important thing is to get
974 something approximately right. */
975 size_t max_bitmap_memory
= 10 * 1024 * 1024;
977 /* The number of bytes we'd use for a single column of minimum
979 size_t column_size
= n
* x
* sizeof (SBITMAP_ELT_TYPE
);
981 /* Often, it's reasonable just to solve all the equations in
983 if (column_size
* SBITMAP_SET_SIZE (y
) <= max_bitmap_memory
)
986 /* Otherwise, pick the largest width we can, without going over the
988 return SBITMAP_ELT_BITS
* ((max_bitmap_memory
+ column_size
- 1)
992 /* Compute the local properties of each recorded expression.
994 Local properties are those that are defined by the block, irrespective of
997 An expression is transparent in a block if its operands are not modified
1000 An expression is computed (locally available) in a block if it is computed
1001 at least once and expression would contain the same value if the
1002 computation was moved to the end of the block.
1004 An expression is locally anticipatable in a block if it is computed at
1005 least once and expression would contain the same value if the computation
1006 was moved to the beginning of the block.
1008 We call this routine for cprop, pre and code hoisting. They all compute
1009 basically the same information and thus can easily share this code.
1011 TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
1012 properties. If NULL, then it is not necessary to compute or record that
1013 particular property.
1015 SETP controls which hash table to look at. If zero, this routine looks at
1016 the expr hash table; if nonzero this routine looks at the set hash table.
1017 Additionally, TRANSP is computed as ~TRANSP, since this is really cprop's
1021 compute_local_properties (transp
, comp
, antloc
, setp
)
1027 unsigned int i
, hash_table_size
;
1028 struct expr
**hash_table
;
1030 /* Initialize any bitmaps that were passed in. */
1034 sbitmap_vector_zero (transp
, n_basic_blocks
);
1036 sbitmap_vector_ones (transp
, n_basic_blocks
);
1040 sbitmap_vector_zero (comp
, n_basic_blocks
);
1042 sbitmap_vector_zero (antloc
, n_basic_blocks
);
1044 /* We use the same code for cprop, pre and hoisting. For cprop
1045 we care about the set hash table, for pre and hoisting we
1046 care about the expr hash table. */
1047 hash_table_size
= setp
? set_hash_table_size
: expr_hash_table_size
;
1048 hash_table
= setp
? set_hash_table
: expr_hash_table
;
1050 for (i
= 0; i
< hash_table_size
; i
++)
1054 for (expr
= hash_table
[i
]; expr
!= NULL
; expr
= expr
->next_same_hash
)
1056 int indx
= expr
->bitmap_index
;
1059 /* The expression is transparent in this block if it is not killed.
1060 We start by assuming all are transparent [none are killed], and
1061 then reset the bits for those that are. */
1063 compute_transp (expr
->expr
, indx
, transp
, setp
);
1065 /* The occurrences recorded in antic_occr are exactly those that
1066 we want to set to non-zero in ANTLOC. */
1068 for (occr
= expr
->antic_occr
; occr
!= NULL
; occr
= occr
->next
)
1070 SET_BIT (antloc
[BLOCK_NUM (occr
->insn
)], indx
);
1072 /* While we're scanning the table, this is a good place to
1074 occr
->deleted_p
= 0;
1077 /* The occurrences recorded in avail_occr are exactly those that
1078 we want to set to non-zero in COMP. */
1080 for (occr
= expr
->avail_occr
; occr
!= NULL
; occr
= occr
->next
)
1082 SET_BIT (comp
[BLOCK_NUM (occr
->insn
)], indx
);
1084 /* While we're scanning the table, this is a good place to
1089 /* While we're scanning the table, this is a good place to
1091 expr
->reaching_reg
= 0;
1096 /* Register set information.
1098 `reg_set_table' records where each register is set or otherwise
1101 static struct obstack reg_set_obstack
;
1104 alloc_reg_set_mem (n_regs
)
1109 reg_set_table_size
= n_regs
+ REG_SET_TABLE_SLOP
;
1110 n
= reg_set_table_size
* sizeof (struct reg_set
*);
1111 reg_set_table
= (struct reg_set
**) gmalloc (n
);
1112 memset ((char *) reg_set_table
, 0, n
);
1114 gcc_obstack_init (®_set_obstack
);
1120 free (reg_set_table
);
1121 obstack_free (®_set_obstack
, NULL_PTR
);
1124 /* Record REGNO in the reg_set table. */
1127 record_one_set (regno
, insn
)
1131 /* allocate a new reg_set element and link it onto the list */
1132 struct reg_set
*new_reg_info
;
1134 /* If the table isn't big enough, enlarge it. */
1135 if (regno
>= reg_set_table_size
)
1137 int new_size
= regno
+ REG_SET_TABLE_SLOP
;
1140 = (struct reg_set
**) grealloc ((char *) reg_set_table
,
1141 new_size
* sizeof (struct reg_set
*));
1142 memset ((char *) (reg_set_table
+ reg_set_table_size
), 0,
1143 (new_size
- reg_set_table_size
) * sizeof (struct reg_set
*));
1144 reg_set_table_size
= new_size
;
1147 new_reg_info
= (struct reg_set
*) obstack_alloc (®_set_obstack
,
1148 sizeof (struct reg_set
));
1149 bytes_used
+= sizeof (struct reg_set
);
1150 new_reg_info
->insn
= insn
;
1151 new_reg_info
->next
= reg_set_table
[regno
];
1152 reg_set_table
[regno
] = new_reg_info
;
1155 /* Called from compute_sets via note_stores to handle one SET or CLOBBER in
1156 an insn. The DATA is really the instruction in which the SET is
1160 record_set_info (dest
, setter
, data
)
1161 rtx dest
, setter ATTRIBUTE_UNUSED
;
1164 rtx record_set_insn
= (rtx
) data
;
1166 if (GET_CODE (dest
) == REG
&& REGNO (dest
) >= FIRST_PSEUDO_REGISTER
)
1167 record_one_set (REGNO (dest
), record_set_insn
);
1170 /* Scan the function and record each set of each pseudo-register.
1172 This is called once, at the start of the gcse pass. See the comments for
1173 `reg_set_table' for further documenation. */
1181 for (insn
= f
; insn
!= 0; insn
= NEXT_INSN (insn
))
1183 note_stores (PATTERN (insn
), record_set_info
, insn
);
1186 /* Hash table support. */
1188 /* For each register, the cuid of the first/last insn in the block to set it,
1189 or -1 if not set. */
1190 #define NEVER_SET -1
1191 static int *reg_first_set
;
1192 static int *reg_last_set
;
1194 /* While computing "first/last set" info, this is the CUID of first/last insn
1195 to set memory or -1 if not set. `mem_last_set' is also used when
1196 performing GCSE to record whether memory has been set since the beginning
1199 Note that handling of memory is very simple, we don't make any attempt
1200 to optimize things (later). */
1201 static int mem_first_set
;
1202 static int mem_last_set
;
1204 /* Perform a quick check whether X, the source of a set, is something
1205 we want to consider for GCSE. */
1211 switch (GET_CODE (x
))
1227 /* Return non-zero if the operands of expression X are unchanged from the
1228 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1229 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */
1232 oprs_unchanged_p (x
, insn
, avail_p
)
1243 code
= GET_CODE (x
);
1248 return (reg_last_set
[REGNO (x
)] == NEVER_SET
1249 || reg_last_set
[REGNO (x
)] < INSN_CUID (insn
));
1251 return (reg_first_set
[REGNO (x
)] == NEVER_SET
1252 || reg_first_set
[REGNO (x
)] >= INSN_CUID (insn
));
1255 if (avail_p
&& mem_last_set
!= NEVER_SET
1256 && mem_last_set
>= INSN_CUID (insn
))
1258 else if (! avail_p
&& mem_first_set
!= NEVER_SET
1259 && mem_first_set
< INSN_CUID (insn
))
1262 return oprs_unchanged_p (XEXP (x
, 0), insn
, avail_p
);
1287 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
1291 /* If we are about to do the last recursive call needed at this
1292 level, change it into iteration. This function is called enough
1295 return oprs_unchanged_p (XEXP (x
, i
), insn
, avail_p
);
1297 else if (! oprs_unchanged_p (XEXP (x
, i
), insn
, avail_p
))
1300 else if (fmt
[i
] == 'E')
1301 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
1302 if (! oprs_unchanged_p (XVECEXP (x
, i
, j
), insn
, avail_p
))
1309 /* Return non-zero if the operands of expression X are unchanged from
1310 the start of INSN's basic block up to but not including INSN. */
1313 oprs_anticipatable_p (x
, insn
)
1316 return oprs_unchanged_p (x
, insn
, 0);
1319 /* Return non-zero if the operands of expression X are unchanged from
1320 INSN to the end of INSN's basic block. */
1323 oprs_available_p (x
, insn
)
1326 return oprs_unchanged_p (x
, insn
, 1);
1329 /* Hash expression X.
1331 MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean
1332 indicating if a volatile operand is found or if the expression contains
1333 something we don't want to insert in the table.
1335 ??? One might want to merge this with canon_hash. Later. */
1338 hash_expr (x
, mode
, do_not_record_p
, hash_table_size
)
1340 enum machine_mode mode
;
1341 int *do_not_record_p
;
1342 int hash_table_size
;
1346 *do_not_record_p
= 0;
1348 hash
= hash_expr_1 (x
, mode
, do_not_record_p
);
1349 return hash
% hash_table_size
;
1351 /* Hash a string. Just add its bytes up. */
1352 static inline unsigned
1357 const unsigned char *p
= (const unsigned char *)ps
;
1366 /* Subroutine of hash_expr to do the actual work. */
1369 hash_expr_1 (x
, mode
, do_not_record_p
)
1371 enum machine_mode mode
;
1372 int *do_not_record_p
;
1379 /* Used to turn recursion into iteration. We can't rely on GCC's
1380 tail-recursion eliminatio since we need to keep accumulating values
1387 code
= GET_CODE (x
);
1391 hash
+= ((unsigned int) REG
<< 7) + REGNO (x
);
1395 hash
+= (((unsigned int) CONST_INT
<< 7) + (unsigned int) mode
1396 + (unsigned int) INTVAL (x
));
1400 /* This is like the general case, except that it only counts
1401 the integers representing the constant. */
1402 hash
+= (unsigned int) code
+ (unsigned int) GET_MODE (x
);
1403 if (GET_MODE (x
) != VOIDmode
)
1404 for (i
= 2; i
< GET_RTX_LENGTH (CONST_DOUBLE
); i
++)
1405 hash
+= (unsigned int) XWINT (x
, i
);
1407 hash
+= ((unsigned int) CONST_DOUBLE_LOW (x
)
1408 + (unsigned int) CONST_DOUBLE_HIGH (x
));
1411 /* Assume there is only one rtx object for any given label. */
1413 /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1414 differences and differences between each stage's debugging dumps. */
1415 hash
+= (((unsigned int) LABEL_REF
<< 7)
1416 + CODE_LABEL_NUMBER (XEXP (x
, 0)));
1421 /* Don't hash on the symbol's address to avoid bootstrap differences.
1422 Different hash values may cause expressions to be recorded in
1423 different orders and thus different registers to be used in the
1424 final assembler. This also avoids differences in the dump files
1425 between various stages. */
1427 const unsigned char *p
= (const unsigned char *) XSTR (x
, 0);
1430 h
+= (h
<< 7) + *p
++; /* ??? revisit */
1432 hash
+= ((unsigned int) SYMBOL_REF
<< 7) + h
;
1437 if (MEM_VOLATILE_P (x
))
1439 *do_not_record_p
= 1;
1443 hash
+= (unsigned int) MEM
;
1444 hash
+= MEM_ALIAS_SET (x
);
1455 case UNSPEC_VOLATILE
:
1456 *do_not_record_p
= 1;
1460 if (MEM_VOLATILE_P (x
))
1462 *do_not_record_p
= 1;
1467 /* We don't want to take the filename and line into account. */
1468 hash
+= (unsigned) code
+ (unsigned) GET_MODE (x
)
1469 + hash_string_1 (ASM_OPERANDS_TEMPLATE (x
))
1470 + hash_string_1 (ASM_OPERANDS_OUTPUT_CONSTRAINT (x
))
1471 + (unsigned) ASM_OPERANDS_OUTPUT_IDX (x
);
1473 if (ASM_OPERANDS_INPUT_LENGTH (x
))
1475 for (i
= 1; i
< ASM_OPERANDS_INPUT_LENGTH (x
); i
++)
1477 hash
+= (hash_expr_1 (ASM_OPERANDS_INPUT (x
, i
),
1478 GET_MODE (ASM_OPERANDS_INPUT (x
, i
)),
1480 + hash_string_1 (ASM_OPERANDS_INPUT_CONSTRAINT
1484 hash
+= hash_string_1 (ASM_OPERANDS_INPUT_CONSTRAINT (x
, 0));
1485 x
= ASM_OPERANDS_INPUT (x
, 0);
1486 mode
= GET_MODE (x
);
1496 hash
+= (unsigned) code
+ (unsigned) GET_MODE (x
);
1497 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
1501 /* If we are about to do the last recursive call
1502 needed at this level, change it into iteration.
1503 This function is called enough to be worth it. */
1510 hash
+= hash_expr_1 (XEXP (x
, i
), 0, do_not_record_p
);
1511 if (*do_not_record_p
)
1515 else if (fmt
[i
] == 'E')
1516 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
1518 hash
+= hash_expr_1 (XVECEXP (x
, i
, j
), 0, do_not_record_p
);
1519 if (*do_not_record_p
)
1523 else if (fmt
[i
] == 's')
1524 hash
+= hash_string_1 (XSTR (x
, i
));
1525 else if (fmt
[i
] == 'i')
1526 hash
+= (unsigned int) XINT (x
, i
);
1534 /* Hash a set of register REGNO.
1536 Sets are hashed on the register that is set. This simplifies the PRE copy
1539 ??? May need to make things more elaborate. Later, as necessary. */
1542 hash_set (regno
, hash_table_size
)
1544 int hash_table_size
;
1549 return hash
% hash_table_size
;
1552 /* Return non-zero if exp1 is equivalent to exp2.
1553 ??? Borrowed from cse.c. Might want to remerge with cse.c. Later. */
1560 register enum rtx_code code
;
1561 register const char *fmt
;
1566 if (x
== 0 || y
== 0)
1569 code
= GET_CODE (x
);
1570 if (code
!= GET_CODE (y
))
1573 /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent. */
1574 if (GET_MODE (x
) != GET_MODE (y
))
1584 return INTVAL (x
) == INTVAL (y
);
1587 return XEXP (x
, 0) == XEXP (y
, 0);
1590 return XSTR (x
, 0) == XSTR (y
, 0);
1593 return REGNO (x
) == REGNO (y
);
1596 /* Can't merge two expressions in different alias sets, since we can
1597 decide that the expression is transparent in a block when it isn't,
1598 due to it being set with the different alias set. */
1599 if (MEM_ALIAS_SET (x
) != MEM_ALIAS_SET (y
))
1603 /* For commutative operations, check both orders. */
1611 return ((expr_equiv_p (XEXP (x
, 0), XEXP (y
, 0))
1612 && expr_equiv_p (XEXP (x
, 1), XEXP (y
, 1)))
1613 || (expr_equiv_p (XEXP (x
, 0), XEXP (y
, 1))
1614 && expr_equiv_p (XEXP (x
, 1), XEXP (y
, 0))));
1617 /* We don't use the generic code below because we want to
1618 disregard filename and line numbers. */
1620 /* A volatile asm isn't equivalent to any other. */
1621 if (MEM_VOLATILE_P (x
) || MEM_VOLATILE_P (y
))
1624 if (GET_MODE (x
) != GET_MODE (y
)
1625 || strcmp (ASM_OPERANDS_TEMPLATE (x
), ASM_OPERANDS_TEMPLATE (y
))
1626 || strcmp (ASM_OPERANDS_OUTPUT_CONSTRAINT (x
),
1627 ASM_OPERANDS_OUTPUT_CONSTRAINT (y
))
1628 || ASM_OPERANDS_OUTPUT_IDX (x
) != ASM_OPERANDS_OUTPUT_IDX (y
)
1629 || ASM_OPERANDS_INPUT_LENGTH (x
) != ASM_OPERANDS_INPUT_LENGTH (y
))
1632 if (ASM_OPERANDS_INPUT_LENGTH (x
))
1634 for (i
= ASM_OPERANDS_INPUT_LENGTH (x
) - 1; i
>= 0; i
--)
1635 if (! expr_equiv_p (ASM_OPERANDS_INPUT (x
, i
),
1636 ASM_OPERANDS_INPUT (y
, i
))
1637 || strcmp (ASM_OPERANDS_INPUT_CONSTRAINT (x
, i
),
1638 ASM_OPERANDS_INPUT_CONSTRAINT (y
, i
)))
1648 /* Compare the elements. If any pair of corresponding elements
1649 fail to match, return 0 for the whole thing. */
1651 fmt
= GET_RTX_FORMAT (code
);
1652 for (i
= GET_RTX_LENGTH (code
) - 1; i
>= 0; i
--)
1657 if (! expr_equiv_p (XEXP (x
, i
), XEXP (y
, i
)))
1662 if (XVECLEN (x
, i
) != XVECLEN (y
, i
))
1664 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
1665 if (! expr_equiv_p (XVECEXP (x
, i
, j
), XVECEXP (y
, i
, j
)))
1670 if (strcmp (XSTR (x
, i
), XSTR (y
, i
)))
1675 if (XINT (x
, i
) != XINT (y
, i
))
1680 if (XWINT (x
, i
) != XWINT (y
, i
))
1695 /* Insert expression X in INSN in the hash table.
1696 If it is already present, record it as the last occurrence in INSN's
1699 MODE is the mode of the value X is being stored into.
1700 It is only used if X is a CONST_INT.
1702 ANTIC_P is non-zero if X is an anticipatable expression.
1703 AVAIL_P is non-zero if X is an available expression. */
1706 insert_expr_in_table (x
, mode
, insn
, antic_p
, avail_p
)
1708 enum machine_mode mode
;
1710 int antic_p
, avail_p
;
1712 int found
, do_not_record_p
;
1714 struct expr
*cur_expr
, *last_expr
= NULL
;
1715 struct occr
*antic_occr
, *avail_occr
;
1716 struct occr
*last_occr
= NULL
;
1718 hash
= hash_expr (x
, mode
, &do_not_record_p
, expr_hash_table_size
);
1720 /* Do not insert expression in table if it contains volatile operands,
1721 or if hash_expr determines the expression is something we don't want
1722 to or can't handle. */
1723 if (do_not_record_p
)
1726 cur_expr
= expr_hash_table
[hash
];
1729 while (cur_expr
&& 0 == (found
= expr_equiv_p (cur_expr
->expr
, x
)))
1731 /* If the expression isn't found, save a pointer to the end of
1733 last_expr
= cur_expr
;
1734 cur_expr
= cur_expr
->next_same_hash
;
1739 cur_expr
= (struct expr
*) gcse_alloc (sizeof (struct expr
));
1740 bytes_used
+= sizeof (struct expr
);
1741 if (expr_hash_table
[hash
] == NULL
)
1742 /* This is the first pattern that hashed to this index. */
1743 expr_hash_table
[hash
] = cur_expr
;
1745 /* Add EXPR to end of this hash chain. */
1746 last_expr
->next_same_hash
= cur_expr
;
1748 /* Set the fields of the expr element. */
1750 cur_expr
->bitmap_index
= n_exprs
++;
1751 cur_expr
->next_same_hash
= NULL
;
1752 cur_expr
->antic_occr
= NULL
;
1753 cur_expr
->avail_occr
= NULL
;
1756 /* Now record the occurrence(s). */
1759 antic_occr
= cur_expr
->antic_occr
;
1761 /* Search for another occurrence in the same basic block. */
1762 while (antic_occr
&& BLOCK_NUM (antic_occr
->insn
) != BLOCK_NUM (insn
))
1764 /* If an occurrence isn't found, save a pointer to the end of
1766 last_occr
= antic_occr
;
1767 antic_occr
= antic_occr
->next
;
1771 /* Found another instance of the expression in the same basic block.
1772 Prefer the currently recorded one. We want the first one in the
1773 block and the block is scanned from start to end. */
1774 ; /* nothing to do */
1777 /* First occurrence of this expression in this basic block. */
1778 antic_occr
= (struct occr
*) gcse_alloc (sizeof (struct occr
));
1779 bytes_used
+= sizeof (struct occr
);
1780 /* First occurrence of this expression in any block? */
1781 if (cur_expr
->antic_occr
== NULL
)
1782 cur_expr
->antic_occr
= antic_occr
;
1784 last_occr
->next
= antic_occr
;
1786 antic_occr
->insn
= insn
;
1787 antic_occr
->next
= NULL
;
1793 avail_occr
= cur_expr
->avail_occr
;
1795 /* Search for another occurrence in the same basic block. */
1796 while (avail_occr
&& BLOCK_NUM (avail_occr
->insn
) != BLOCK_NUM (insn
))
1798 /* If an occurrence isn't found, save a pointer to the end of
1800 last_occr
= avail_occr
;
1801 avail_occr
= avail_occr
->next
;
1805 /* Found another instance of the expression in the same basic block.
1806 Prefer this occurrence to the currently recorded one. We want
1807 the last one in the block and the block is scanned from start
1809 avail_occr
->insn
= insn
;
1812 /* First occurrence of this expression in this basic block. */
1813 avail_occr
= (struct occr
*) gcse_alloc (sizeof (struct occr
));
1814 bytes_used
+= sizeof (struct occr
);
1816 /* First occurrence of this expression in any block? */
1817 if (cur_expr
->avail_occr
== NULL
)
1818 cur_expr
->avail_occr
= avail_occr
;
1820 last_occr
->next
= avail_occr
;
1822 avail_occr
->insn
= insn
;
1823 avail_occr
->next
= NULL
;
1828 /* Insert pattern X in INSN in the hash table.
1829 X is a SET of a reg to either another reg or a constant.
1830 If it is already present, record it as the last occurrence in INSN's
1834 insert_set_in_table (x
, insn
)
1840 struct expr
*cur_expr
, *last_expr
= NULL
;
1841 struct occr
*cur_occr
, *last_occr
= NULL
;
1843 if (GET_CODE (x
) != SET
1844 || GET_CODE (SET_DEST (x
)) != REG
)
1847 hash
= hash_set (REGNO (SET_DEST (x
)), set_hash_table_size
);
1849 cur_expr
= set_hash_table
[hash
];
1852 while (cur_expr
&& 0 == (found
= expr_equiv_p (cur_expr
->expr
, x
)))
1854 /* If the expression isn't found, save a pointer to the end of
1856 last_expr
= cur_expr
;
1857 cur_expr
= cur_expr
->next_same_hash
;
1862 cur_expr
= (struct expr
*) gcse_alloc (sizeof (struct expr
));
1863 bytes_used
+= sizeof (struct expr
);
1864 if (set_hash_table
[hash
] == NULL
)
1865 /* This is the first pattern that hashed to this index. */
1866 set_hash_table
[hash
] = cur_expr
;
1868 /* Add EXPR to end of this hash chain. */
1869 last_expr
->next_same_hash
= cur_expr
;
1871 /* Set the fields of the expr element.
1872 We must copy X because it can be modified when copy propagation is
1873 performed on its operands. */
1874 /* ??? Should this go in a different obstack? */
1875 cur_expr
->expr
= copy_rtx (x
);
1876 cur_expr
->bitmap_index
= n_sets
++;
1877 cur_expr
->next_same_hash
= NULL
;
1878 cur_expr
->antic_occr
= NULL
;
1879 cur_expr
->avail_occr
= NULL
;
1882 /* Now record the occurrence. */
1883 cur_occr
= cur_expr
->avail_occr
;
1885 /* Search for another occurrence in the same basic block. */
1886 while (cur_occr
&& BLOCK_NUM (cur_occr
->insn
) != BLOCK_NUM (insn
))
1888 /* If an occurrence isn't found, save a pointer to the end of
1890 last_occr
= cur_occr
;
1891 cur_occr
= cur_occr
->next
;
1895 /* Found another instance of the expression in the same basic block.
1896 Prefer this occurrence to the currently recorded one. We want the
1897 last one in the block and the block is scanned from start to end. */
1898 cur_occr
->insn
= insn
;
1901 /* First occurrence of this expression in this basic block. */
1902 cur_occr
= (struct occr
*) gcse_alloc (sizeof (struct occr
));
1903 bytes_used
+= sizeof (struct occr
);
1905 /* First occurrence of this expression in any block? */
1906 if (cur_expr
->avail_occr
== NULL
)
1907 cur_expr
->avail_occr
= cur_occr
;
1909 last_occr
->next
= cur_occr
;
1911 cur_occr
->insn
= insn
;
1912 cur_occr
->next
= NULL
;
1916 /* Scan pattern PAT of INSN and add an entry to the hash table. If SET_P is
1917 non-zero, this is for the assignment hash table, otherwise it is for the
1918 expression hash table. */
1921 hash_scan_set (pat
, insn
, set_p
)
1925 rtx src
= SET_SRC (pat
);
1926 rtx dest
= SET_DEST (pat
);
1928 if (GET_CODE (src
) == CALL
)
1929 hash_scan_call (src
, insn
);
1931 if (GET_CODE (dest
) == REG
)
1933 int regno
= REGNO (dest
);
1936 /* Only record sets of pseudo-regs in the hash table. */
1938 && regno
>= FIRST_PSEUDO_REGISTER
1939 /* Don't GCSE something if we can't do a reg/reg copy. */
1940 && can_copy_p
[GET_MODE (dest
)]
1941 /* Is SET_SRC something we want to gcse? */
1942 && want_to_gcse_p (src
)
1943 /* Don't GCSE if it has attached REG_EQUIV note.
1944 At this point this only function parameters should have
1945 REG_EQUIV notes and if the argument slot is used somewhere
1946 explicitely, it means address of parameter has been taken,
1947 so we should not extend the lifetime of the pseudo. */
1948 && ((note
= find_reg_note (insn
, REG_EQUIV
, NULL_RTX
)) == 0
1949 || GET_CODE (XEXP (note
, 0)) != MEM
))
1951 /* An expression is not anticipatable if its operands are
1952 modified before this insn. */
1953 int antic_p
= oprs_anticipatable_p (src
, insn
);
1954 /* An expression is not available if its operands are
1955 subsequently modified, including this insn. */
1956 int avail_p
= oprs_available_p (src
, insn
);
1958 insert_expr_in_table (src
, GET_MODE (dest
), insn
, antic_p
, avail_p
);
1961 /* Record sets for constant/copy propagation. */
1963 && regno
>= FIRST_PSEUDO_REGISTER
1964 && ((GET_CODE (src
) == REG
1965 && REGNO (src
) >= FIRST_PSEUDO_REGISTER
1966 && can_copy_p
[GET_MODE (dest
)])
1967 || GET_CODE (src
) == CONST_INT
1968 || GET_CODE (src
) == SYMBOL_REF
1969 || GET_CODE (src
) == CONST_DOUBLE
)
1970 /* A copy is not available if its src or dest is subsequently
1971 modified. Here we want to search from INSN+1 on, but
1972 oprs_available_p searches from INSN on. */
1973 && (insn
== BLOCK_END (BLOCK_NUM (insn
))
1974 || ((tmp
= next_nonnote_insn (insn
)) != NULL_RTX
1975 && oprs_available_p (pat
, tmp
))))
1976 insert_set_in_table (pat
, insn
);
1981 hash_scan_clobber (x
, insn
)
1982 rtx x ATTRIBUTE_UNUSED
, insn ATTRIBUTE_UNUSED
;
1984 /* Currently nothing to do. */
1988 hash_scan_call (x
, insn
)
1989 rtx x ATTRIBUTE_UNUSED
, insn ATTRIBUTE_UNUSED
;
1991 /* Currently nothing to do. */
1994 /* Process INSN and add hash table entries as appropriate.
1996 Only available expressions that set a single pseudo-reg are recorded.
1998 Single sets in a PARALLEL could be handled, but it's an extra complication
1999 that isn't dealt with right now. The trick is handling the CLOBBERs that
2000 are also in the PARALLEL. Later.
2002 If SET_P is non-zero, this is for the assignment hash table,
2003 otherwise it is for the expression hash table.
2004 If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
2005 not record any expressions. */
2008 hash_scan_insn (insn
, set_p
, in_libcall_block
)
2011 int in_libcall_block
;
2013 rtx pat
= PATTERN (insn
);
2016 /* Pick out the sets of INSN and for other forms of instructions record
2017 what's been modified. */
2019 if (GET_CODE (pat
) == SET
&& ! in_libcall_block
)
2021 /* Ignore obvious no-ops. */
2022 if (SET_SRC (pat
) != SET_DEST (pat
))
2023 hash_scan_set (pat
, insn
, set_p
);
2025 else if (GET_CODE (pat
) == PARALLEL
)
2026 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
2028 rtx x
= XVECEXP (pat
, 0, i
);
2030 if (GET_CODE (x
) == SET
)
2032 if (GET_CODE (SET_SRC (x
)) == CALL
)
2033 hash_scan_call (SET_SRC (x
), insn
);
2035 else if (GET_CODE (x
) == CLOBBER
)
2036 hash_scan_clobber (x
, insn
);
2037 else if (GET_CODE (x
) == CALL
)
2038 hash_scan_call (x
, insn
);
2041 else if (GET_CODE (pat
) == CLOBBER
)
2042 hash_scan_clobber (pat
, insn
);
2043 else if (GET_CODE (pat
) == CALL
)
2044 hash_scan_call (pat
, insn
);
2048 dump_hash_table (file
, name
, table
, table_size
, total_size
)
2051 struct expr
**table
;
2052 int table_size
, total_size
;
2055 /* Flattened out table, so it's printed in proper order. */
2056 struct expr
**flat_table
;
2057 unsigned int *hash_val
;
2061 = (struct expr
**) xcalloc (total_size
, sizeof (struct expr
*));
2062 hash_val
= (unsigned int *) xmalloc (total_size
* sizeof (unsigned int));
2064 for (i
= 0; i
< table_size
; i
++)
2065 for (expr
= table
[i
]; expr
!= NULL
; expr
= expr
->next_same_hash
)
2067 flat_table
[expr
->bitmap_index
] = expr
;
2068 hash_val
[expr
->bitmap_index
] = i
;
2071 fprintf (file
, "%s hash table (%d buckets, %d entries)\n",
2072 name
, table_size
, total_size
);
2074 for (i
= 0; i
< total_size
; i
++)
2075 if (flat_table
[i
] != 0)
2077 expr
= flat_table
[i
];
2078 fprintf (file
, "Index %d (hash value %d)\n ",
2079 expr
->bitmap_index
, hash_val
[i
]);
2080 print_rtl (file
, expr
->expr
);
2081 fprintf (file
, "\n");
2084 fprintf (file
, "\n");
2090 /* Record register first/last/block set information for REGNO in INSN.
2092 reg_first_set records the first place in the block where the register
2093 is set and is used to compute "anticipatability".
2095 reg_last_set records the last place in the block where the register
2096 is set and is used to compute "availability".
2098 reg_set_in_block records whether the register is set in the block
2099 and is used to compute "transparency". */
2102 record_last_reg_set_info (insn
, regno
)
2106 if (reg_first_set
[regno
] == NEVER_SET
)
2107 reg_first_set
[regno
] = INSN_CUID (insn
);
2109 reg_last_set
[regno
] = INSN_CUID (insn
);
2110 SET_BIT (reg_set_in_block
[BLOCK_NUM (insn
)], regno
);
2113 /* Record memory first/last/block set information for INSN. */
2116 record_last_mem_set_info (insn
)
2119 if (mem_first_set
== NEVER_SET
)
2120 mem_first_set
= INSN_CUID (insn
);
2122 mem_last_set
= INSN_CUID (insn
);
2123 mem_set_in_block
[BLOCK_NUM (insn
)] = 1;
2126 /* Called from compute_hash_table via note_stores to handle one
2127 SET or CLOBBER in an insn. DATA is really the instruction in which
2128 the SET is taking place. */
2131 record_last_set_info (dest
, setter
, data
)
2132 rtx dest
, setter ATTRIBUTE_UNUSED
;
2135 rtx last_set_insn
= (rtx
) data
;
2137 if (GET_CODE (dest
) == SUBREG
)
2138 dest
= SUBREG_REG (dest
);
2140 if (GET_CODE (dest
) == REG
)
2141 record_last_reg_set_info (last_set_insn
, REGNO (dest
));
2142 else if (GET_CODE (dest
) == MEM
2143 /* Ignore pushes, they clobber nothing. */
2144 && ! push_operand (dest
, GET_MODE (dest
)))
2145 record_last_mem_set_info (last_set_insn
);
2148 /* Top level function to create an expression or assignment hash table.
2150 Expression entries are placed in the hash table if
2151 - they are of the form (set (pseudo-reg) src),
2152 - src is something we want to perform GCSE on,
2153 - none of the operands are subsequently modified in the block
2155 Assignment entries are placed in the hash table if
2156 - they are of the form (set (pseudo-reg) src),
2157 - src is something we want to perform const/copy propagation on,
2158 - none of the operands or target are subsequently modified in the block
2160 Currently src must be a pseudo-reg or a const_int.
2162 F is the first insn.
2163 SET_P is non-zero for computing the assignment hash table. */
2166 compute_hash_table (set_p
)
2171 /* While we compute the hash table we also compute a bit array of which
2172 registers are set in which blocks.
2173 We also compute which blocks set memory, in the absence of aliasing
2174 support [which is TODO].
2175 ??? This isn't needed during const/copy propagation, but it's cheap to
2177 sbitmap_vector_zero (reg_set_in_block
, n_basic_blocks
);
2178 memset ((char *) mem_set_in_block
, 0, n_basic_blocks
);
2180 /* Some working arrays used to track first and last set in each block. */
2181 /* ??? One could use alloca here, but at some size a threshold is crossed
2182 beyond which one should use malloc. Are we at that threshold here? */
2183 reg_first_set
= (int *) gmalloc (max_gcse_regno
* sizeof (int));
2184 reg_last_set
= (int *) gmalloc (max_gcse_regno
* sizeof (int));
2186 for (bb
= 0; bb
< n_basic_blocks
; bb
++)
2190 int in_libcall_block
;
2193 /* First pass over the instructions records information used to
2194 determine when registers and memory are first and last set.
2195 ??? The mem_set_in_block and hard-reg reg_set_in_block computation
2196 could be moved to compute_sets since they currently don't change. */
2198 for (i
= 0; i
< max_gcse_regno
; i
++)
2199 reg_first_set
[i
] = reg_last_set
[i
] = NEVER_SET
;
2201 mem_first_set
= NEVER_SET
;
2202 mem_last_set
= NEVER_SET
;
2204 for (insn
= BLOCK_HEAD (bb
);
2205 insn
&& insn
!= NEXT_INSN (BLOCK_END (bb
));
2206 insn
= NEXT_INSN (insn
))
2208 #ifdef NON_SAVING_SETJMP
2209 if (NON_SAVING_SETJMP
&& GET_CODE (insn
) == NOTE
2210 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_SETJMP
)
2212 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
2213 record_last_reg_set_info (insn
, regno
);
2218 if (! INSN_P (insn
))
2221 if (GET_CODE (insn
) == CALL_INSN
)
2223 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
2224 if ((call_used_regs
[regno
]
2225 && regno
!= STACK_POINTER_REGNUM
2226 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2227 && regno
!= HARD_FRAME_POINTER_REGNUM
2229 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2230 && ! (regno
== ARG_POINTER_REGNUM
&& fixed_regs
[regno
])
2232 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2233 && ! (regno
== PIC_OFFSET_TABLE_REGNUM
&& flag_pic
)
2236 && regno
!= FRAME_POINTER_REGNUM
)
2237 || global_regs
[regno
])
2238 record_last_reg_set_info (insn
, regno
);
2240 if (! CONST_CALL_P (insn
))
2241 record_last_mem_set_info (insn
);
2244 note_stores (PATTERN (insn
), record_last_set_info
, insn
);
2247 /* The next pass builds the hash table. */
2249 for (insn
= BLOCK_HEAD (bb
), in_libcall_block
= 0;
2250 insn
&& insn
!= NEXT_INSN (BLOCK_END (bb
));
2251 insn
= NEXT_INSN (insn
))
2254 if (find_reg_note (insn
, REG_LIBCALL
, NULL_RTX
))
2255 in_libcall_block
= 1;
2256 else if (find_reg_note (insn
, REG_RETVAL
, NULL_RTX
))
2257 in_libcall_block
= 0;
2258 hash_scan_insn (insn
, set_p
, in_libcall_block
);
2262 free (reg_first_set
);
2263 free (reg_last_set
);
2265 /* Catch bugs early. */
2266 reg_first_set
= reg_last_set
= 0;
2269 /* Allocate space for the set hash table.
2270 N_INSNS is the number of instructions in the function.
2271 It is used to determine the number of buckets to use. */
2274 alloc_set_hash_table (n_insns
)
2279 set_hash_table_size
= n_insns
/ 4;
2280 if (set_hash_table_size
< 11)
2281 set_hash_table_size
= 11;
2283 /* Attempt to maintain efficient use of hash table.
2284 Making it an odd number is simplest for now.
2285 ??? Later take some measurements. */
2286 set_hash_table_size
|= 1;
2287 n
= set_hash_table_size
* sizeof (struct expr
*);
2288 set_hash_table
= (struct expr
**) gmalloc (n
);
2291 /* Free things allocated by alloc_set_hash_table. */
2294 free_set_hash_table ()
2296 free (set_hash_table
);
2299 /* Compute the hash table for doing copy/const propagation. */
2302 compute_set_hash_table ()
2304 /* Initialize count of number of entries in hash table. */
2306 memset ((char *) set_hash_table
, 0,
2307 set_hash_table_size
* sizeof (struct expr
*));
2309 compute_hash_table (1);
2312 /* Allocate space for the expression hash table.
2313 N_INSNS is the number of instructions in the function.
2314 It is used to determine the number of buckets to use. */
2317 alloc_expr_hash_table (n_insns
)
2318 unsigned int n_insns
;
2322 expr_hash_table_size
= n_insns
/ 2;
2323 /* Make sure the amount is usable. */
2324 if (expr_hash_table_size
< 11)
2325 expr_hash_table_size
= 11;
2327 /* Attempt to maintain efficient use of hash table.
2328 Making it an odd number is simplest for now.
2329 ??? Later take some measurements. */
2330 expr_hash_table_size
|= 1;
2331 n
= expr_hash_table_size
* sizeof (struct expr
*);
2332 expr_hash_table
= (struct expr
**) gmalloc (n
);
2335 /* Free things allocated by alloc_expr_hash_table. */
2338 free_expr_hash_table ()
2340 free (expr_hash_table
);
2343 /* Compute the hash table for doing GCSE. */
2346 compute_expr_hash_table ()
2348 /* Initialize count of number of entries in hash table. */
2350 memset ((char *) expr_hash_table
, 0,
2351 expr_hash_table_size
* sizeof (struct expr
*));
2353 compute_hash_table (0);
2356 /* Expression tracking support. */
2358 /* Lookup pattern PAT in the expression table.
2359 The result is a pointer to the table entry, or NULL if not found. */
2361 static struct expr
*
2365 int do_not_record_p
;
2366 unsigned int hash
= hash_expr (pat
, GET_MODE (pat
), &do_not_record_p
,
2367 expr_hash_table_size
);
2370 if (do_not_record_p
)
2373 expr
= expr_hash_table
[hash
];
2375 while (expr
&& ! expr_equiv_p (expr
->expr
, pat
))
2376 expr
= expr
->next_same_hash
;
2381 /* Lookup REGNO in the set table. If PAT is non-NULL look for the entry that
2382 matches it, otherwise return the first entry for REGNO. The result is a
2383 pointer to the table entry, or NULL if not found. */
2385 static struct expr
*
2386 lookup_set (regno
, pat
)
2390 unsigned int hash
= hash_set (regno
, set_hash_table_size
);
2393 expr
= set_hash_table
[hash
];
2397 while (expr
&& ! expr_equiv_p (expr
->expr
, pat
))
2398 expr
= expr
->next_same_hash
;
2402 while (expr
&& REGNO (SET_DEST (expr
->expr
)) != regno
)
2403 expr
= expr
->next_same_hash
;
2409 /* Return the next entry for REGNO in list EXPR. */
2411 static struct expr
*
2412 next_set (regno
, expr
)
2417 expr
= expr
->next_same_hash
;
2418 while (expr
&& REGNO (SET_DEST (expr
->expr
)) != regno
);
2423 /* Reset tables used to keep track of what's still available [since the
2424 start of the block]. */
2427 reset_opr_set_tables ()
2429 /* Maintain a bitmap of which regs have been set since beginning of
2431 sbitmap_zero (reg_set_bitmap
);
2433 /* Also keep a record of the last instruction to modify memory.
2434 For now this is very trivial, we only record whether any memory
2435 location has been modified. */
2439 /* Return non-zero if the operands of X are not set before INSN in
2440 INSN's basic block. */
2443 oprs_not_set_p (x
, insn
)
2453 code
= GET_CODE (x
);
2468 if (mem_last_set
!= 0)
2471 return oprs_not_set_p (XEXP (x
, 0), insn
);
2474 return ! TEST_BIT (reg_set_bitmap
, REGNO (x
));
2480 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
2484 /* If we are about to do the last recursive call
2485 needed at this level, change it into iteration.
2486 This function is called enough to be worth it. */
2488 return oprs_not_set_p (XEXP (x
, i
), insn
);
2490 if (! oprs_not_set_p (XEXP (x
, i
), insn
))
2493 else if (fmt
[i
] == 'E')
2494 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
2495 if (! oprs_not_set_p (XVECEXP (x
, i
, j
), insn
))
2502 /* Mark things set by a CALL. */
2508 mem_last_set
= INSN_CUID (insn
);
2511 /* Mark things set by a SET. */
2514 mark_set (pat
, insn
)
2517 rtx dest
= SET_DEST (pat
);
2519 while (GET_CODE (dest
) == SUBREG
2520 || GET_CODE (dest
) == ZERO_EXTRACT
2521 || GET_CODE (dest
) == SIGN_EXTRACT
2522 || GET_CODE (dest
) == STRICT_LOW_PART
)
2523 dest
= XEXP (dest
, 0);
2525 if (GET_CODE (dest
) == REG
)
2526 SET_BIT (reg_set_bitmap
, REGNO (dest
));
2527 else if (GET_CODE (dest
) == MEM
)
2528 mem_last_set
= INSN_CUID (insn
);
2530 if (GET_CODE (SET_SRC (pat
)) == CALL
)
2534 /* Record things set by a CLOBBER. */
2537 mark_clobber (pat
, insn
)
2540 rtx clob
= XEXP (pat
, 0);
2542 while (GET_CODE (clob
) == SUBREG
|| GET_CODE (clob
) == STRICT_LOW_PART
)
2543 clob
= XEXP (clob
, 0);
2545 if (GET_CODE (clob
) == REG
)
2546 SET_BIT (reg_set_bitmap
, REGNO (clob
));
2548 mem_last_set
= INSN_CUID (insn
);
2551 /* Record things set by INSN.
2552 This data is used by oprs_not_set_p. */
2555 mark_oprs_set (insn
)
2558 rtx pat
= PATTERN (insn
);
2561 if (GET_CODE (pat
) == SET
)
2562 mark_set (pat
, insn
);
2563 else if (GET_CODE (pat
) == PARALLEL
)
2564 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
2566 rtx x
= XVECEXP (pat
, 0, i
);
2568 if (GET_CODE (x
) == SET
)
2570 else if (GET_CODE (x
) == CLOBBER
)
2571 mark_clobber (x
, insn
);
2572 else if (GET_CODE (x
) == CALL
)
2576 else if (GET_CODE (pat
) == CLOBBER
)
2577 mark_clobber (pat
, insn
);
2578 else if (GET_CODE (pat
) == CALL
)
2583 /* Classic GCSE reaching definition support. */
2585 /* Allocate reaching def variables. */
2588 alloc_rd_mem (n_blocks
, n_insns
)
2589 int n_blocks
, n_insns
;
2591 rd_kill
= (sbitmap
*) sbitmap_vector_alloc (n_blocks
, n_insns
);
2592 sbitmap_vector_zero (rd_kill
, n_basic_blocks
);
2594 rd_gen
= (sbitmap
*) sbitmap_vector_alloc (n_blocks
, n_insns
);
2595 sbitmap_vector_zero (rd_gen
, n_basic_blocks
);
2597 reaching_defs
= (sbitmap
*) sbitmap_vector_alloc (n_blocks
, n_insns
);
2598 sbitmap_vector_zero (reaching_defs
, n_basic_blocks
);
2600 rd_out
= (sbitmap
*) sbitmap_vector_alloc (n_blocks
, n_insns
);
2601 sbitmap_vector_zero (rd_out
, n_basic_blocks
);
2604 /* Free reaching def variables. */
2611 free (reaching_defs
);
2615 /* Add INSN to the kills of BB. REGNO, set in BB, is killed by INSN. */
2618 handle_rd_kill_set (insn
, regno
, bb
)
2622 struct reg_set
*this_reg
;
2624 for (this_reg
= reg_set_table
[regno
]; this_reg
; this_reg
= this_reg
->next
)
2625 if (BLOCK_NUM (this_reg
->insn
) != BLOCK_NUM (insn
))
2626 SET_BIT (rd_kill
[bb
], INSN_CUID (this_reg
->insn
));
2629 /* Compute the set of kill's for reaching definitions. */
2638 For each set bit in `gen' of the block (i.e each insn which
2639 generates a definition in the block)
2640 Call the reg set by the insn corresponding to that bit regx
2641 Look at the linked list starting at reg_set_table[regx]
2642 For each setting of regx in the linked list, which is not in
2644 Set the bit in `kill' corresponding to that insn. */
2645 for (bb
= 0; bb
< n_basic_blocks
; bb
++)
2646 for (cuid
= 0; cuid
< max_cuid
; cuid
++)
2647 if (TEST_BIT (rd_gen
[bb
], cuid
))
2649 rtx insn
= CUID_INSN (cuid
);
2650 rtx pat
= PATTERN (insn
);
2652 if (GET_CODE (insn
) == CALL_INSN
)
2654 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
2656 if ((call_used_regs
[regno
]
2657 && regno
!= STACK_POINTER_REGNUM
2658 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2659 && regno
!= HARD_FRAME_POINTER_REGNUM
2661 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2662 && ! (regno
== ARG_POINTER_REGNUM
2663 && fixed_regs
[regno
])
2665 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2666 && ! (regno
== PIC_OFFSET_TABLE_REGNUM
&& flag_pic
)
2668 && regno
!= FRAME_POINTER_REGNUM
)
2669 || global_regs
[regno
])
2670 handle_rd_kill_set (insn
, regno
, bb
);
2674 if (GET_CODE (pat
) == PARALLEL
)
2676 for (i
= XVECLEN (pat
, 0) - 1; i
>= 0; i
--)
2678 enum rtx_code code
= GET_CODE (XVECEXP (pat
, 0, i
));
2680 if ((code
== SET
|| code
== CLOBBER
)
2681 && GET_CODE (XEXP (XVECEXP (pat
, 0, i
), 0)) == REG
)
2682 handle_rd_kill_set (insn
,
2683 REGNO (XEXP (XVECEXP (pat
, 0, i
), 0)),
2687 else if (GET_CODE (pat
) == SET
&& GET_CODE (SET_DEST (pat
)) == REG
)
2688 /* Each setting of this register outside of this block
2689 must be marked in the set of kills in this block. */
2690 handle_rd_kill_set (insn
, REGNO (SET_DEST (pat
)), bb
);
2694 /* Compute the reaching definitions as in
2695 Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
2696 Chapter 10. It is the same algorithm as used for computing available
2697 expressions but applied to the gens and kills of reaching definitions. */
2702 int bb
, changed
, passes
;
2704 for (bb
= 0; bb
< n_basic_blocks
; bb
++)
2705 sbitmap_copy (rd_out
[bb
] /*dst*/, rd_gen
[bb
] /*src*/);
2712 for (bb
= 0; bb
< n_basic_blocks
; bb
++)
2714 sbitmap_union_of_preds (reaching_defs
[bb
], rd_out
, bb
);
2715 changed
|= sbitmap_union_of_diff (rd_out
[bb
], rd_gen
[bb
],
2716 reaching_defs
[bb
], rd_kill
[bb
]);
2722 fprintf (gcse_file
, "reaching def computation: %d passes\n", passes
);
2725 /* Classic GCSE available expression support. */
2727 /* Allocate memory for available expression computation. */
2730 alloc_avail_expr_mem (n_blocks
, n_exprs
)
2731 int n_blocks
, n_exprs
;
2733 ae_kill
= (sbitmap
*) sbitmap_vector_alloc (n_blocks
, n_exprs
);
2734 sbitmap_vector_zero (ae_kill
, n_basic_blocks
);
2736 ae_gen
= (sbitmap
*) sbitmap_vector_alloc (n_blocks
, n_exprs
);
2737 sbitmap_vector_zero (ae_gen
, n_basic_blocks
);
2739 ae_in
= (sbitmap
*) sbitmap_vector_alloc (n_blocks
, n_exprs
);
2740 sbitmap_vector_zero (ae_in
, n_basic_blocks
);
2742 ae_out
= (sbitmap
*) sbitmap_vector_alloc (n_blocks
, n_exprs
);
2743 sbitmap_vector_zero (ae_out
, n_basic_blocks
);
2747 free_avail_expr_mem ()
2755 /* Compute the set of available expressions generated in each basic block. */
2764 /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
2765 This is all we have to do because an expression is not recorded if it
2766 is not available, and the only expressions we want to work with are the
2767 ones that are recorded. */
2768 for (i
= 0; i
< expr_hash_table_size
; i
++)
2769 for (expr
= expr_hash_table
[i
]; expr
!= 0; expr
= expr
->next_same_hash
)
2770 for (occr
= expr
->avail_occr
; occr
!= 0; occr
= occr
->next
)
2771 SET_BIT (ae_gen
[BLOCK_NUM (occr
->insn
)], expr
->bitmap_index
);
2774 /* Return non-zero if expression X is killed in BB. */
2777 expr_killed_p (x
, bb
)
2788 code
= GET_CODE (x
);
2792 return TEST_BIT (reg_set_in_block
[bb
], REGNO (x
));
2795 if (mem_set_in_block
[bb
])
2798 return expr_killed_p (XEXP (x
, 0), bb
);
2815 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
2819 /* If we are about to do the last recursive call
2820 needed at this level, change it into iteration.
2821 This function is called enough to be worth it. */
2823 return expr_killed_p (XEXP (x
, i
), bb
);
2824 else if (expr_killed_p (XEXP (x
, i
), bb
))
2827 else if (fmt
[i
] == 'E')
2828 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
2829 if (expr_killed_p (XVECEXP (x
, i
, j
), bb
))
2836 /* Compute the set of available expressions killed in each basic block. */
2839 compute_ae_kill (ae_gen
, ae_kill
)
2840 sbitmap
*ae_gen
, *ae_kill
;
2846 for (bb
= 0; bb
< n_basic_blocks
; bb
++)
2847 for (i
= 0; i
< expr_hash_table_size
; i
++)
2848 for (expr
= expr_hash_table
[i
]; expr
; expr
= expr
->next_same_hash
)
2850 /* Skip EXPR if generated in this block. */
2851 if (TEST_BIT (ae_gen
[bb
], expr
->bitmap_index
))
2854 if (expr_killed_p (expr
->expr
, bb
))
2855 SET_BIT (ae_kill
[bb
], expr
->bitmap_index
);
2859 /* Actually perform the Classic GCSE optimizations. */
2861 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
2863 CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
2864 as a positive reach. We want to do this when there are two computations
2865 of the expression in the block.
2867 VISITED is a pointer to a working buffer for tracking which BB's have
2868 been visited. It is NULL for the top-level call.
2870 We treat reaching expressions that go through blocks containing the same
2871 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
2872 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2873 2 as not reaching. The intent is to improve the probability of finding
2874 only one reaching expression and to reduce register lifetimes by picking
2875 the closest such expression. */
2878 expr_reaches_here_p_work (occr
, expr
, bb
, check_self_loop
, visited
)
2882 int check_self_loop
;
2887 for (pred
= BASIC_BLOCK(bb
)->pred
; pred
!= NULL
; pred
= pred
->pred_next
)
2889 int pred_bb
= pred
->src
->index
;
2891 if (visited
[pred_bb
])
2892 /* This predecessor has already been visited. Nothing to do. */
2894 else if (pred_bb
== bb
)
2896 /* BB loops on itself. */
2898 && TEST_BIT (ae_gen
[pred_bb
], expr
->bitmap_index
)
2899 && BLOCK_NUM (occr
->insn
) == pred_bb
)
2902 visited
[pred_bb
] = 1;
2905 /* Ignore this predecessor if it kills the expression. */
2906 else if (TEST_BIT (ae_kill
[pred_bb
], expr
->bitmap_index
))
2907 visited
[pred_bb
] = 1;
2909 /* Does this predecessor generate this expression? */
2910 else if (TEST_BIT (ae_gen
[pred_bb
], expr
->bitmap_index
))
2912 /* Is this the occurrence we're looking for?
2913 Note that there's only one generating occurrence per block
2914 so we just need to check the block number. */
2915 if (BLOCK_NUM (occr
->insn
) == pred_bb
)
2918 visited
[pred_bb
] = 1;
2921 /* Neither gen nor kill. */
2924 visited
[pred_bb
] = 1;
2925 if (expr_reaches_here_p_work (occr
, expr
, pred_bb
, check_self_loop
,
2932 /* All paths have been checked. */
2936 /* This wrapper for expr_reaches_here_p_work() is to ensure that any
2937 memory allocated for that function is returned. */
2940 expr_reaches_here_p (occr
, expr
, bb
, check_self_loop
)
2944 int check_self_loop
;
2947 char *visited
= (char *) xcalloc (n_basic_blocks
, 1);
2949 rval
= expr_reaches_here_p_work (occr
, expr
, bb
, check_self_loop
, visited
);
2955 /* Return the instruction that computes EXPR that reaches INSN's basic block.
2956 If there is more than one such instruction, return NULL.
2958 Called only by handle_avail_expr. */
2961 computing_insn (expr
, insn
)
2965 int bb
= BLOCK_NUM (insn
);
2967 if (expr
->avail_occr
->next
== NULL
)
2969 if (BLOCK_NUM (expr
->avail_occr
->insn
) == bb
)
2970 /* The available expression is actually itself
2971 (i.e. a loop in the flow graph) so do nothing. */
2974 /* (FIXME) Case that we found a pattern that was created by
2975 a substitution that took place. */
2976 return expr
->avail_occr
->insn
;
2980 /* Pattern is computed more than once.
2981 Search backwards from this insn to see how many of these
2982 computations actually reach this insn. */
2984 rtx insn_computes_expr
= NULL
;
2987 for (occr
= expr
->avail_occr
; occr
!= NULL
; occr
= occr
->next
)
2989 if (BLOCK_NUM (occr
->insn
) == bb
)
2991 /* The expression is generated in this block.
2992 The only time we care about this is when the expression
2993 is generated later in the block [and thus there's a loop].
2994 We let the normal cse pass handle the other cases. */
2995 if (INSN_CUID (insn
) < INSN_CUID (occr
->insn
)
2996 && expr_reaches_here_p (occr
, expr
, bb
, 1))
3002 insn_computes_expr
= occr
->insn
;
3005 else if (expr_reaches_here_p (occr
, expr
, bb
, 0))
3011 insn_computes_expr
= occr
->insn
;
3015 if (insn_computes_expr
== NULL
)
3018 return insn_computes_expr
;
3022 /* Return non-zero if the definition in DEF_INSN can reach INSN.
3023 Only called by can_disregard_other_sets. */
3026 def_reaches_here_p (insn
, def_insn
)
3031 if (TEST_BIT (reaching_defs
[BLOCK_NUM (insn
)], INSN_CUID (def_insn
)))
3034 if (BLOCK_NUM (insn
) == BLOCK_NUM (def_insn
))
3036 if (INSN_CUID (def_insn
) < INSN_CUID (insn
))
3038 if (GET_CODE (PATTERN (def_insn
)) == PARALLEL
)
3040 else if (GET_CODE (PATTERN (def_insn
)) == CLOBBER
)
3041 reg
= XEXP (PATTERN (def_insn
), 0);
3042 else if (GET_CODE (PATTERN (def_insn
)) == SET
)
3043 reg
= SET_DEST (PATTERN (def_insn
));
3047 return ! reg_set_between_p (reg
, NEXT_INSN (def_insn
), insn
);
3056 /* Return non-zero if *ADDR_THIS_REG can only have one value at INSN. The
3057 value returned is the number of definitions that reach INSN. Returning a
3058 value of zero means that [maybe] more than one definition reaches INSN and
3059 the caller can't perform whatever optimization it is trying. i.e. it is
3060 always safe to return zero. */
3063 can_disregard_other_sets (addr_this_reg
, insn
, for_combine
)
3064 struct reg_set
**addr_this_reg
;
3068 int number_of_reaching_defs
= 0;
3069 struct reg_set
*this_reg
;
3071 for (this_reg
= *addr_this_reg
; this_reg
!= 0; this_reg
= this_reg
->next
)
3072 if (def_reaches_here_p (insn
, this_reg
->insn
))
3074 number_of_reaching_defs
++;
3075 /* Ignore parallels for now. */
3076 if (GET_CODE (PATTERN (this_reg
->insn
)) == PARALLEL
)
3080 && (GET_CODE (PATTERN (this_reg
->insn
)) == CLOBBER
3081 || ! rtx_equal_p (SET_SRC (PATTERN (this_reg
->insn
)),
3082 SET_SRC (PATTERN (insn
)))))
3083 /* A setting of the reg to a different value reaches INSN. */
3086 if (number_of_reaching_defs
> 1)
3088 /* If in this setting the value the register is being set to is
3089 equal to the previous value the register was set to and this
3090 setting reaches the insn we are trying to do the substitution
3091 on then we are ok. */
3092 if (GET_CODE (PATTERN (this_reg
->insn
)) == CLOBBER
)
3094 else if (! rtx_equal_p (SET_SRC (PATTERN (this_reg
->insn
)),
3095 SET_SRC (PATTERN (insn
))))
3099 *addr_this_reg
= this_reg
;
3102 return number_of_reaching_defs
;
3105 /* Expression computed by insn is available and the substitution is legal,
3106 so try to perform the substitution.
3108 The result is non-zero if any changes were made. */
3111 handle_avail_expr (insn
, expr
)
3115 rtx pat
, insn_computes_expr
;
3117 struct reg_set
*this_reg
;
3118 int found_setting
, use_src
;
3121 /* We only handle the case where one computation of the expression
3122 reaches this instruction. */
3123 insn_computes_expr
= computing_insn (expr
, insn
);
3124 if (insn_computes_expr
== NULL
)
3130 /* At this point we know only one computation of EXPR outside of this
3131 block reaches this insn. Now try to find a register that the
3132 expression is computed into. */
3133 if (GET_CODE (SET_SRC (PATTERN (insn_computes_expr
))) == REG
)
3135 /* This is the case when the available expression that reaches
3136 here has already been handled as an available expression. */
3137 unsigned int regnum_for_replacing
3138 = REGNO (SET_SRC (PATTERN (insn_computes_expr
)));
3140 /* If the register was created by GCSE we can't use `reg_set_table',
3141 however we know it's set only once. */
3142 if (regnum_for_replacing
>= max_gcse_regno
3143 /* If the register the expression is computed into is set only once,
3144 or only one set reaches this insn, we can use it. */
3145 || (((this_reg
= reg_set_table
[regnum_for_replacing
]),
3146 this_reg
->next
== NULL
)
3147 || can_disregard_other_sets (&this_reg
, insn
, 0)))
3156 unsigned int regnum_for_replacing
3157 = REGNO (SET_DEST (PATTERN (insn_computes_expr
)));
3159 /* This shouldn't happen. */
3160 if (regnum_for_replacing
>= max_gcse_regno
)
3163 this_reg
= reg_set_table
[regnum_for_replacing
];
3165 /* If the register the expression is computed into is set only once,
3166 or only one set reaches this insn, use it. */
3167 if (this_reg
->next
== NULL
3168 || can_disregard_other_sets (&this_reg
, insn
, 0))
3174 pat
= PATTERN (insn
);
3176 to
= SET_SRC (PATTERN (insn_computes_expr
));
3178 to
= SET_DEST (PATTERN (insn_computes_expr
));
3179 changed
= validate_change (insn
, &SET_SRC (pat
), to
, 0);
3181 /* We should be able to ignore the return code from validate_change but
3182 to play it safe we check. */
3186 if (gcse_file
!= NULL
)
3188 fprintf (gcse_file
, "GCSE: Replacing the source in insn %d with",
3190 fprintf (gcse_file
, " reg %d %s insn %d\n",
3191 REGNO (to
), use_src
? "from" : "set in",
3192 INSN_UID (insn_computes_expr
));
3197 /* The register that the expr is computed into is set more than once. */
3198 else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3200 /* Insert an insn after insnx that copies the reg set in insnx
3201 into a new pseudo register call this new register REGN.
3202 From insnb until end of basic block or until REGB is set
3203 replace all uses of REGB with REGN. */
3206 to
= gen_reg_rtx (GET_MODE (SET_DEST (PATTERN (insn_computes_expr
))));
3208 /* Generate the new insn. */
3209 /* ??? If the change fails, we return 0, even though we created
3210 an insn. I think this is ok. */
3212 = emit_insn_after (gen_rtx_SET (VOIDmode
, to
,
3214 (insn_computes_expr
))),
3215 insn_computes_expr
);
3217 /* Keep block number table up to date. */
3218 set_block_num (new_insn
, BLOCK_NUM (insn_computes_expr
));
3220 /* Keep register set table up to date. */
3221 record_one_set (REGNO (to
), new_insn
);
3223 gcse_create_count
++;
3224 if (gcse_file
!= NULL
)
3226 fprintf (gcse_file
, "GCSE: Creating insn %d to copy value of reg %d",
3227 INSN_UID (NEXT_INSN (insn_computes_expr
)),
3228 REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr
)))));
3229 fprintf (gcse_file
, ", computed in insn %d,\n",
3230 INSN_UID (insn_computes_expr
));
3231 fprintf (gcse_file
, " into newly allocated reg %d\n",
3235 pat
= PATTERN (insn
);
3237 /* Do register replacement for INSN. */
3238 changed
= validate_change (insn
, &SET_SRC (pat
),
3240 (NEXT_INSN (insn_computes_expr
))),
3243 /* We should be able to ignore the return code from validate_change but
3244 to play it safe we check. */
3248 if (gcse_file
!= NULL
)
3251 "GCSE: Replacing the source in insn %d with reg %d ",
3253 REGNO (SET_DEST (PATTERN (NEXT_INSN
3254 (insn_computes_expr
)))));
3255 fprintf (gcse_file
, "set in insn %d\n",
3256 INSN_UID (insn_computes_expr
));
3264 /* Perform classic GCSE. This is called by one_classic_gcse_pass after all
3265 the dataflow analysis has been done.
3267 The result is non-zero if a change was made. */
3275 /* Note we start at block 1. */
3278 for (bb
= 1; bb
< n_basic_blocks
; bb
++)
3280 /* Reset tables used to keep track of what's still valid [since the
3281 start of the block]. */
3282 reset_opr_set_tables ();
3284 for (insn
= BLOCK_HEAD (bb
);
3285 insn
!= NULL
&& insn
!= NEXT_INSN (BLOCK_END (bb
));
3286 insn
= NEXT_INSN (insn
))
3288 /* Is insn of form (set (pseudo-reg) ...)? */
3289 if (GET_CODE (insn
) == INSN
3290 && GET_CODE (PATTERN (insn
)) == SET
3291 && GET_CODE (SET_DEST (PATTERN (insn
))) == REG
3292 && REGNO (SET_DEST (PATTERN (insn
))) >= FIRST_PSEUDO_REGISTER
)
3294 rtx pat
= PATTERN (insn
);
3295 rtx src
= SET_SRC (pat
);
3298 if (want_to_gcse_p (src
)
3299 /* Is the expression recorded? */
3300 && ((expr
= lookup_expr (src
)) != NULL
)
3301 /* Is the expression available [at the start of the
3303 && TEST_BIT (ae_in
[bb
], expr
->bitmap_index
)
3304 /* Are the operands unchanged since the start of the
3306 && oprs_not_set_p (src
, insn
))
3307 changed
|= handle_avail_expr (insn
, expr
);
3310 /* Keep track of everything modified by this insn. */
3311 /* ??? Need to be careful w.r.t. mods done to INSN. */
3313 mark_oprs_set (insn
);
3320 /* Top level routine to perform one classic GCSE pass.
3322 Return non-zero if a change was made. */
3325 one_classic_gcse_pass (pass
)
3330 gcse_subst_count
= 0;
3331 gcse_create_count
= 0;
3333 alloc_expr_hash_table (max_cuid
);
3334 alloc_rd_mem (n_basic_blocks
, max_cuid
);
3335 compute_expr_hash_table ();
3337 dump_hash_table (gcse_file
, "Expression", expr_hash_table
,
3338 expr_hash_table_size
, n_exprs
);
3344 alloc_avail_expr_mem (n_basic_blocks
, n_exprs
);
3346 compute_ae_kill (ae_gen
, ae_kill
);
3347 compute_available (ae_gen
, ae_kill
, ae_out
, ae_in
);
3348 changed
= classic_gcse ();
3349 free_avail_expr_mem ();
3353 free_expr_hash_table ();
3357 fprintf (gcse_file
, "\n");
3358 fprintf (gcse_file
, "GCSE of %s, pass %d: %d bytes needed, %d substs,",
3359 current_function_name
, pass
, bytes_used
, gcse_subst_count
);
3360 fprintf (gcse_file
, "%d insns created\n", gcse_create_count
);
3366 /* Compute copy/constant propagation working variables. */
3368 /* Local properties of assignments. */
3369 static sbitmap
*cprop_pavloc
;
3370 static sbitmap
*cprop_absaltered
;
3372 /* Global properties of assignments (computed from the local properties). */
3373 static sbitmap
*cprop_avin
;
3374 static sbitmap
*cprop_avout
;
3376 /* Allocate vars used for copy/const propagation. N_BLOCKS is the number of
3377 basic blocks. N_SETS is the number of sets. */
3380 alloc_cprop_mem (n_blocks
, n_sets
)
3381 int n_blocks
, n_sets
;
3383 cprop_pavloc
= sbitmap_vector_alloc (n_blocks
, n_sets
);
3384 cprop_absaltered
= sbitmap_vector_alloc (n_blocks
, n_sets
);
3386 cprop_avin
= sbitmap_vector_alloc (n_blocks
, n_sets
);
3387 cprop_avout
= sbitmap_vector_alloc (n_blocks
, n_sets
);
3390 /* Free vars used by copy/const propagation. */
3395 free (cprop_pavloc
);
3396 free (cprop_absaltered
);
3401 /* For each block, compute whether X is transparent. X is either an
3402 expression or an assignment [though we don't care which, for this context
3403 an assignment is treated as an expression]. For each block where an
3404 element of X is modified, set (SET_P == 1) or reset (SET_P == 0) the INDX
3408 compute_transp (x
, indx
, bmap
, set_p
)
3419 /* repeat is used to turn tail-recursion into iteration since GCC
3420 can't do it when there's no return value. */
3426 code
= GET_CODE (x
);
3432 if (REGNO (x
) < FIRST_PSEUDO_REGISTER
)
3434 for (bb
= 0; bb
< n_basic_blocks
; bb
++)
3435 if (TEST_BIT (reg_set_in_block
[bb
], REGNO (x
)))
3436 SET_BIT (bmap
[bb
], indx
);
3440 for (r
= reg_set_table
[REGNO (x
)]; r
!= NULL
; r
= r
->next
)
3441 SET_BIT (bmap
[BLOCK_NUM (r
->insn
)], indx
);
3446 if (REGNO (x
) < FIRST_PSEUDO_REGISTER
)
3448 for (bb
= 0; bb
< n_basic_blocks
; bb
++)
3449 if (TEST_BIT (reg_set_in_block
[bb
], REGNO (x
)))
3450 RESET_BIT (bmap
[bb
], indx
);
3454 for (r
= reg_set_table
[REGNO (x
)]; r
!= NULL
; r
= r
->next
)
3455 RESET_BIT (bmap
[BLOCK_NUM (r
->insn
)], indx
);
3464 for (bb
= 0; bb
< n_basic_blocks
; bb
++)
3465 if (mem_set_in_block
[bb
])
3466 SET_BIT (bmap
[bb
], indx
);
3470 for (bb
= 0; bb
< n_basic_blocks
; bb
++)
3471 if (mem_set_in_block
[bb
])
3472 RESET_BIT (bmap
[bb
], indx
);
3493 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
3497 /* If we are about to do the last recursive call
3498 needed at this level, change it into iteration.
3499 This function is called enough to be worth it. */
3506 compute_transp (XEXP (x
, i
), indx
, bmap
, set_p
);
3508 else if (fmt
[i
] == 'E')
3509 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
3510 compute_transp (XVECEXP (x
, i
, j
), indx
, bmap
, set_p
);
3514 /* Top level routine to do the dataflow analysis needed by copy/const
3518 compute_cprop_data ()
3520 compute_local_properties (cprop_absaltered
, cprop_pavloc
, NULL
, 1);
3521 compute_available (cprop_pavloc
, cprop_absaltered
,
3522 cprop_avout
, cprop_avin
);
3525 /* Copy/constant propagation. */
3527 /* Maximum number of register uses in an insn that we handle. */
3530 /* Table of uses found in an insn.
3531 Allocated statically to avoid alloc/free complexity and overhead. */
3532 static struct reg_use reg_use_table
[MAX_USES
];
3534 /* Index into `reg_use_table' while building it. */
3535 static int reg_use_count
;
3537 /* Set up a list of register numbers used in INSN. The found uses are stored
3538 in `reg_use_table'. `reg_use_count' is initialized to zero before entry,
3539 and contains the number of uses in the table upon exit.
3541 ??? If a register appears multiple times we will record it multiple times.
3542 This doesn't hurt anything but it will slow things down. */
3552 /* repeat is used to turn tail-recursion into iteration since GCC
3553 can't do it when there's no return value. */
3559 code
= GET_CODE (x
);
3563 if (reg_use_count
== MAX_USES
)
3566 reg_use_table
[reg_use_count
].reg_rtx
= x
;
3584 case ASM_INPUT
: /*FIXME*/
3588 if (GET_CODE (SET_DEST (x
)) == MEM
)
3589 find_used_regs (SET_DEST (x
));
3597 /* Recursively scan the operands of this expression. */
3599 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
3603 /* If we are about to do the last recursive call
3604 needed at this level, change it into iteration.
3605 This function is called enough to be worth it. */
3612 find_used_regs (XEXP (x
, i
));
3614 else if (fmt
[i
] == 'E')
3615 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
3616 find_used_regs (XVECEXP (x
, i
, j
));
3620 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3621 Returns non-zero is successful. */
3624 try_replace_reg (from
, to
, insn
)
3632 note
= find_reg_note (insn
, REG_EQUAL
, NULL_RTX
);
3635 note
= find_reg_note (insn
, REG_EQUIV
, NULL_RTX
);
3637 /* If this fails we could try to simplify the result of the
3638 replacement and attempt to recognize the simplified insn.
3640 But we need a general simplify_rtx that doesn't have pass
3641 specific state variables. I'm not aware of one at the moment. */
3643 success
= validate_replace_src (from
, to
, insn
);
3644 set
= single_set (insn
);
3646 /* We've failed to do replacement. Try to add REG_EQUAL note to not loose
3648 if (!success
&& !note
)
3653 note
= REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_EQUAL
,
3654 copy_rtx (SET_SRC (set
)),
3658 /* Always do the replacement in REQ_EQUAL and REG_EQUIV notes. Also
3659 try to simplify them. */
3664 if (!validate_replace_rtx_subexp (from
, to
, insn
, &XEXP (note
, 0)))
3667 src
= XEXP (note
, 0);
3669 /* Try to simplify resulting note. */
3670 simplified
= simplify_rtx (src
);
3674 XEXP (note
, 0) = src
;
3677 /* REG_EQUAL may get simplified into register.
3678 We don't allow that. Remove that note. This code ought
3679 not to hapen, because previous code ought to syntetize
3680 reg-reg move, but be on the safe side. */
3681 else if (REG_P (src
))
3682 remove_note (insn
, note
);
3687 /* Find a set of REGNOs that are available on entry to INSN's block. Returns
3688 NULL no such set is found. */
3690 static struct expr
*
3691 find_avail_set (regno
, insn
)
3695 /* SET1 contains the last set found that can be returned to the caller for
3696 use in a substitution. */
3697 struct expr
*set1
= 0;
3699 /* Loops are not possible here. To get a loop we would need two sets
3700 available at the start of the block containing INSN. ie we would
3701 need two sets like this available at the start of the block:
3703 (set (reg X) (reg Y))
3704 (set (reg Y) (reg X))
3706 This can not happen since the set of (reg Y) would have killed the
3707 set of (reg X) making it unavailable at the start of this block. */
3711 struct expr
*set
= lookup_set (regno
, NULL_RTX
);
3713 /* Find a set that is available at the start of the block
3714 which contains INSN. */
3717 if (TEST_BIT (cprop_avin
[BLOCK_NUM (insn
)], set
->bitmap_index
))
3719 set
= next_set (regno
, set
);
3722 /* If no available set was found we've reached the end of the
3723 (possibly empty) copy chain. */
3727 if (GET_CODE (set
->expr
) != SET
)
3730 src
= SET_SRC (set
->expr
);
3732 /* We know the set is available.
3733 Now check that SRC is ANTLOC (i.e. none of the source operands
3734 have changed since the start of the block).
3736 If the source operand changed, we may still use it for the next
3737 iteration of this loop, but we may not use it for substitutions. */
3739 if (CONSTANT_P (src
) || oprs_not_set_p (src
, insn
))
3742 /* If the source of the set is anything except a register, then
3743 we have reached the end of the copy chain. */
3744 if (GET_CODE (src
) != REG
)
3747 /* Follow the copy chain, ie start another iteration of the loop
3748 and see if we have an available copy into SRC. */
3749 regno
= REGNO (src
);
3752 /* SET1 holds the last set that was available and anticipatable at
3757 /* Subroutine of cprop_insn that tries to propagate constants into
3758 JUMP_INSNS. INSN must be a conditional jump; COPY is a copy of it
3759 that we can use for substitutions.
3760 REG_USED is the use we will try to replace, SRC is the constant we
3761 will try to substitute for it.
3762 Returns nonzero if a change was made. */
3765 cprop_jump (insn
, copy
, reg_used
, src
)
3767 struct reg_use
*reg_used
;
3770 rtx set
= PATTERN (copy
);
3773 /* Replace the register with the appropriate constant. */
3774 replace_rtx (SET_SRC (set
), reg_used
->reg_rtx
, src
);
3776 temp
= simplify_ternary_operation (GET_CODE (SET_SRC (set
)),
3777 GET_MODE (SET_SRC (set
)),
3778 GET_MODE (XEXP (SET_SRC (set
), 0)),
3779 XEXP (SET_SRC (set
), 0),
3780 XEXP (SET_SRC (set
), 1),
3781 XEXP (SET_SRC (set
), 2));
3783 /* If no simplification can be made, then try the next
3788 SET_SRC (set
) = temp
;
3790 /* That may have changed the structure of TEMP, so
3791 force it to be rerecognized if it has not turned
3792 into a nop or unconditional jump. */
3794 INSN_CODE (copy
) = -1;
3795 if ((SET_DEST (set
) == pc_rtx
3796 && (SET_SRC (set
) == pc_rtx
3797 || GET_CODE (SET_SRC (set
)) == LABEL_REF
))
3798 || recog (PATTERN (copy
), copy
, NULL
) >= 0)
3800 /* This has either become an unconditional jump
3801 or a nop-jump. We'd like to delete nop jumps
3802 here, but doing so confuses gcse. So we just
3803 make the replacement and let later passes
3805 PATTERN (insn
) = set
;
3806 INSN_CODE (insn
) = -1;
3808 /* One less use of the label this insn used to jump to
3809 if we turned this into a NOP jump. */
3810 if (SET_SRC (set
) == pc_rtx
&& JUMP_LABEL (insn
) != 0)
3811 --LABEL_NUSES (JUMP_LABEL (insn
));
3813 /* If this has turned into an unconditional jump,
3814 then put a barrier after it so that the unreachable
3815 code will be deleted. */
3816 if (GET_CODE (SET_SRC (set
)) == LABEL_REF
)
3817 emit_barrier_after (insn
);
3819 run_jump_opt_after_gcse
= 1;
3822 if (gcse_file
!= NULL
)
3825 "CONST-PROP: Replacing reg %d in insn %d with constant ",
3826 REGNO (reg_used
->reg_rtx
), INSN_UID (insn
));
3827 print_rtl (gcse_file
, src
);
3828 fprintf (gcse_file
, "\n");
3838 /* Subroutine of cprop_insn that tries to propagate constants into JUMP_INSNS
3839 for machines that have CC0. INSN is a single set that stores into CC0;
3840 the insn following it is a conditional jump. REG_USED is the use we will
3841 try to replace, SRC is the constant we will try to substitute for it.
3842 Returns nonzero if a change was made. */
3845 cprop_cc0_jump (insn
, reg_used
, src
)
3847 struct reg_use
*reg_used
;
3850 rtx jump
= NEXT_INSN (insn
);
3851 rtx copy
= copy_rtx (jump
);
3852 rtx set
= PATTERN (copy
);
3854 /* We need to copy the source of the cc0 setter, as cprop_jump is going to
3855 substitute into it. */
3856 replace_rtx (SET_SRC (set
), cc0_rtx
, copy_rtx (SET_SRC (PATTERN (insn
))));
3857 if (! cprop_jump (jump
, copy
, reg_used
, src
))
3860 /* If we succeeded, delete the cc0 setter. */
3861 PUT_CODE (insn
, NOTE
);
3862 NOTE_LINE_NUMBER (insn
) = NOTE_INSN_DELETED
;
3863 NOTE_SOURCE_FILE (insn
) = 0;
3868 /* Perform constant and copy propagation on INSN.
3869 The result is non-zero if a change was made. */
3872 cprop_insn (insn
, alter_jumps
)
3876 struct reg_use
*reg_used
;
3880 /* Only propagate into SETs. Note that a conditional jump is a
3881 SET with pc_rtx as the destination. */
3882 if ((GET_CODE (insn
) != INSN
3883 && GET_CODE (insn
) != JUMP_INSN
)
3884 || GET_CODE (PATTERN (insn
)) != SET
)
3888 find_used_regs (PATTERN (insn
));
3890 note
= find_reg_note (insn
, REG_EQUIV
, NULL_RTX
);
3892 note
= find_reg_note (insn
, REG_EQUAL
, NULL_RTX
);
3894 /* We may win even when propagating constants into notes. */
3896 find_used_regs (XEXP (note
, 0));
3898 for (reg_used
= ®_use_table
[0]; reg_use_count
> 0;
3899 reg_used
++, reg_use_count
--)
3901 unsigned int regno
= REGNO (reg_used
->reg_rtx
);
3905 /* Ignore registers created by GCSE.
3906 We do this because ... */
3907 if (regno
>= max_gcse_regno
)
3910 /* If the register has already been set in this block, there's
3911 nothing we can do. */
3912 if (! oprs_not_set_p (reg_used
->reg_rtx
, insn
))
3915 /* Find an assignment that sets reg_used and is available
3916 at the start of the block. */
3917 set
= find_avail_set (regno
, insn
);
3922 /* ??? We might be able to handle PARALLELs. Later. */
3923 if (GET_CODE (pat
) != SET
)
3926 src
= SET_SRC (pat
);
3928 /* Constant propagation. */
3929 if (GET_CODE (src
) == CONST_INT
|| GET_CODE (src
) == CONST_DOUBLE
3930 || GET_CODE (src
) == SYMBOL_REF
)
3932 /* Handle normal insns first. */
3933 if (GET_CODE (insn
) == INSN
3934 && try_replace_reg (reg_used
->reg_rtx
, src
, insn
))
3938 if (gcse_file
!= NULL
)
3940 fprintf (gcse_file
, "CONST-PROP: Replacing reg %d in ",
3942 fprintf (gcse_file
, "insn %d with constant ",
3944 print_rtl (gcse_file
, src
);
3945 fprintf (gcse_file
, "\n");
3948 /* The original insn setting reg_used may or may not now be
3949 deletable. We leave the deletion to flow. */
3952 /* Try to propagate a CONST_INT into a conditional jump.
3953 We're pretty specific about what we will handle in this
3954 code, we can extend this as necessary over time.
3956 Right now the insn in question must look like
3957 (set (pc) (if_then_else ...)) */
3958 else if (alter_jumps
3959 && GET_CODE (insn
) == JUMP_INSN
3960 && condjump_p (insn
)
3961 && ! simplejump_p (insn
))
3962 changed
|= cprop_jump (insn
, copy_rtx (insn
), reg_used
, src
);
3964 /* Similar code for machines that use a pair of CC0 setter and
3965 conditional jump insn. */
3966 else if (alter_jumps
3967 && GET_CODE (PATTERN (insn
)) == SET
3968 && SET_DEST (PATTERN (insn
)) == cc0_rtx
3969 && GET_CODE (NEXT_INSN (insn
)) == JUMP_INSN
3970 && condjump_p (NEXT_INSN (insn
))
3971 && ! simplejump_p (NEXT_INSN (insn
)))
3973 if (cprop_cc0_jump (insn
, reg_used
, src
))
3981 else if (GET_CODE (src
) == REG
3982 && REGNO (src
) >= FIRST_PSEUDO_REGISTER
3983 && REGNO (src
) != regno
)
3985 if (try_replace_reg (reg_used
->reg_rtx
, src
, insn
))
3989 if (gcse_file
!= NULL
)
3991 fprintf (gcse_file
, "COPY-PROP: Replacing reg %d in insn %d",
3992 regno
, INSN_UID (insn
));
3993 fprintf (gcse_file
, " with reg %d\n", REGNO (src
));
3996 /* The original insn setting reg_used may or may not now be
3997 deletable. We leave the deletion to flow. */
3998 /* FIXME: If it turns out that the insn isn't deletable,
3999 then we may have unnecessarily extended register lifetimes
4000 and made things worse. */
4008 /* Forward propagate copies. This includes copies and constants. Return
4009 non-zero if a change was made. */
4018 /* Note we start at block 1. */
4021 for (bb
= 1; bb
< n_basic_blocks
; bb
++)
4023 /* Reset tables used to keep track of what's still valid [since the
4024 start of the block]. */
4025 reset_opr_set_tables ();
4027 for (insn
= BLOCK_HEAD (bb
);
4028 insn
!= NULL
&& insn
!= NEXT_INSN (BLOCK_END (bb
));
4029 insn
= NEXT_INSN (insn
))
4033 changed
|= cprop_insn (insn
, alter_jumps
);
4035 /* Keep track of everything modified by this insn. */
4036 /* ??? Need to be careful w.r.t. mods done to INSN. Don't
4037 call mark_oprs_set if we turned the insn into a NOTE. */
4038 if (GET_CODE (insn
) != NOTE
)
4039 mark_oprs_set (insn
);
4044 if (gcse_file
!= NULL
)
4045 fprintf (gcse_file
, "\n");
4050 /* Perform one copy/constant propagation pass.
4051 F is the first insn in the function.
4052 PASS is the pass count. */
4055 one_cprop_pass (pass
, alter_jumps
)
4061 const_prop_count
= 0;
4062 copy_prop_count
= 0;
4064 alloc_set_hash_table (max_cuid
);
4065 compute_set_hash_table ();
4067 dump_hash_table (gcse_file
, "SET", set_hash_table
, set_hash_table_size
,
4071 alloc_cprop_mem (n_basic_blocks
, n_sets
);
4072 compute_cprop_data ();
4073 changed
= cprop (alter_jumps
);
4077 free_set_hash_table ();
4081 fprintf (gcse_file
, "CPROP of %s, pass %d: %d bytes needed, ",
4082 current_function_name
, pass
, bytes_used
);
4083 fprintf (gcse_file
, "%d const props, %d copy props\n\n",
4084 const_prop_count
, copy_prop_count
);
4090 /* Compute PRE+LCM working variables. */
4092 /* Local properties of expressions. */
4093 /* Nonzero for expressions that are transparent in the block. */
4094 static sbitmap
*transp
;
4096 /* Nonzero for expressions that are transparent at the end of the block.
4097 This is only zero for expressions killed by abnormal critical edge
4098 created by a calls. */
4099 static sbitmap
*transpout
;
4101 /* Nonzero for expressions that are computed (available) in the block. */
4102 static sbitmap
*comp
;
4104 /* Nonzero for expressions that are locally anticipatable in the block. */
4105 static sbitmap
*antloc
;
4107 /* Nonzero for expressions where this block is an optimal computation
4109 static sbitmap
*pre_optimal
;
4111 /* Nonzero for expressions which are redundant in a particular block. */
4112 static sbitmap
*pre_redundant
;
4114 /* Nonzero for expressions which should be inserted on a specific edge. */
4115 static sbitmap
*pre_insert_map
;
4117 /* Nonzero for expressions which should be deleted in a specific block. */
4118 static sbitmap
*pre_delete_map
;
4120 /* Contains the edge_list returned by pre_edge_lcm. */
4121 static struct edge_list
*edge_list
;
4123 /* Redundant insns. */
4124 static sbitmap pre_redundant_insns
;
4126 /* Allocate vars used for PRE analysis. */
4129 alloc_pre_mem (n_blocks
, n_exprs
)
4130 int n_blocks
, n_exprs
;
4132 transp
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
4133 comp
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
4134 antloc
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
4137 pre_redundant
= NULL
;
4138 pre_insert_map
= NULL
;
4139 pre_delete_map
= NULL
;
4142 ae_kill
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
4144 /* pre_insert and pre_delete are allocated later. */
4147 /* Free vars used for PRE analysis. */
4155 /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */
4160 free (pre_redundant
);
4162 free (pre_insert_map
);
4164 free (pre_delete_map
);
4171 transp
= comp
= NULL
;
4172 pre_optimal
= pre_redundant
= pre_insert_map
= pre_delete_map
= NULL
;
4173 ae_in
= ae_out
= NULL
;
4176 /* Top level routine to do the dataflow analysis needed by PRE. */
4181 sbitmap trapping_expr
;
4185 compute_local_properties (transp
, comp
, antloc
, 0);
4186 sbitmap_vector_zero (ae_kill
, n_basic_blocks
);
4188 /* Collect expressions which might trap. */
4189 trapping_expr
= sbitmap_alloc (n_exprs
);
4190 sbitmap_zero (trapping_expr
);
4191 for (ui
= 0; ui
< expr_hash_table_size
; ui
++)
4194 for (e
= expr_hash_table
[ui
]; e
!= NULL
; e
= e
->next_same_hash
)
4195 if (may_trap_p (e
->expr
))
4196 SET_BIT (trapping_expr
, e
->bitmap_index
);
4199 /* Compute ae_kill for each basic block using:
4203 This is significantly faster than compute_ae_kill. */
4205 for (i
= 0; i
< n_basic_blocks
; i
++)
4209 /* If the current block is the destination of an abnormal edge, we
4210 kill all trapping expressions because we won't be able to properly
4211 place the instruction on the edge. So make them neither
4212 anticipatable nor transparent. This is fairly conservative. */
4213 for (e
= BASIC_BLOCK (i
)->pred
; e
; e
= e
->pred_next
)
4214 if (e
->flags
& EDGE_ABNORMAL
)
4216 sbitmap_difference (antloc
[i
], antloc
[i
], trapping_expr
);
4217 sbitmap_difference (transp
[i
], transp
[i
], trapping_expr
);
4221 sbitmap_a_or_b (ae_kill
[i
], transp
[i
], comp
[i
]);
4222 sbitmap_not (ae_kill
[i
], ae_kill
[i
]);
4225 edge_list
= pre_edge_lcm (gcse_file
, n_exprs
, transp
, comp
, antloc
,
4226 ae_kill
, &pre_insert_map
, &pre_delete_map
);
4231 free (trapping_expr
);
4236 /* Return non-zero if an occurrence of expression EXPR in OCCR_BB would reach
4239 VISITED is a pointer to a working buffer for tracking which BB's have
4240 been visited. It is NULL for the top-level call.
4242 We treat reaching expressions that go through blocks containing the same
4243 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
4244 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4245 2 as not reaching. The intent is to improve the probability of finding
4246 only one reaching expression and to reduce register lifetimes by picking
4247 the closest such expression. */
4250 pre_expr_reaches_here_p_work (occr_bb
, expr
, bb
, visited
)
4258 for (pred
= BASIC_BLOCK (bb
)->pred
; pred
!= NULL
; pred
= pred
->pred_next
)
4260 int pred_bb
= pred
->src
->index
;
4262 if (pred
->src
== ENTRY_BLOCK_PTR
4263 /* Has predecessor has already been visited? */
4264 || visited
[pred_bb
])
4265 ;/* Nothing to do. */
4267 /* Does this predecessor generate this expression? */
4268 else if (TEST_BIT (comp
[pred_bb
], expr
->bitmap_index
))
4270 /* Is this the occurrence we're looking for?
4271 Note that there's only one generating occurrence per block
4272 so we just need to check the block number. */
4273 if (occr_bb
== pred_bb
)
4276 visited
[pred_bb
] = 1;
4278 /* Ignore this predecessor if it kills the expression. */
4279 else if (! TEST_BIT (transp
[pred_bb
], expr
->bitmap_index
))
4280 visited
[pred_bb
] = 1;
4282 /* Neither gen nor kill. */
4285 visited
[pred_bb
] = 1;
4286 if (pre_expr_reaches_here_p_work (occr_bb
, expr
, pred_bb
, visited
))
4291 /* All paths have been checked. */
4295 /* The wrapper for pre_expr_reaches_here_work that ensures that any
4296 memory allocated for that function is returned. */
4299 pre_expr_reaches_here_p (occr_bb
, expr
, bb
)
4305 char *visited
= (char *) xcalloc (n_basic_blocks
, 1);
4307 rval
= pre_expr_reaches_here_p_work(occr_bb
, expr
, bb
, visited
);
4314 /* Given an expr, generate RTL which we can insert at the end of a BB,
4315 or on an edge. Set the block number of any insns generated to
4319 process_insert_insn (expr
)
4322 rtx reg
= expr
->reaching_reg
;
4323 rtx pat
, copied_expr
;
4327 copied_expr
= copy_rtx (expr
->expr
);
4328 emit_move_insn (reg
, copied_expr
);
4329 first_new_insn
= get_insns ();
4330 pat
= gen_sequence ();
4336 /* Add EXPR to the end of basic block BB.
4338 This is used by both the PRE and code hoisting.
4340 For PRE, we want to verify that the expr is either transparent
4341 or locally anticipatable in the target block. This check makes
4342 no sense for code hoisting. */
4345 insert_insn_end_bb (expr
, bb
, pre
)
4350 rtx insn
= BLOCK_END (bb
);
4352 rtx reg
= expr
->reaching_reg
;
4353 int regno
= REGNO (reg
);
4357 pat
= process_insert_insn (expr
);
4359 /* If the last insn is a jump, insert EXPR in front [taking care to
4360 handle cc0, etc. properly]. */
4362 if (GET_CODE (insn
) == JUMP_INSN
)
4368 /* If this is a jump table, then we can't insert stuff here. Since
4369 we know the previous real insn must be the tablejump, we insert
4370 the new instruction just before the tablejump. */
4371 if (GET_CODE (PATTERN (insn
)) == ADDR_VEC
4372 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
)
4373 insn
= prev_real_insn (insn
);
4376 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4377 if cc0 isn't set. */
4378 note
= find_reg_note (insn
, REG_CC_SETTER
, NULL_RTX
);
4380 insn
= XEXP (note
, 0);
4383 rtx maybe_cc0_setter
= prev_nonnote_insn (insn
);
4384 if (maybe_cc0_setter
4385 && INSN_P (maybe_cc0_setter
)
4386 && sets_cc0_p (PATTERN (maybe_cc0_setter
)))
4387 insn
= maybe_cc0_setter
;
4390 /* FIXME: What if something in cc0/jump uses value set in new insn? */
4391 new_insn
= emit_block_insn_before (pat
, insn
, BASIC_BLOCK (bb
));
4394 /* Likewise if the last insn is a call, as will happen in the presence
4395 of exception handling. */
4396 else if (GET_CODE (insn
) == CALL_INSN
)
4398 HARD_REG_SET parm_regs
;
4402 /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4403 we search backward and place the instructions before the first
4404 parameter is loaded. Do this for everyone for consistency and a
4405 presumtion that we'll get better code elsewhere as well.
4407 It should always be the case that we can put these instructions
4408 anywhere in the basic block with performing PRE optimizations.
4412 && !TEST_BIT (antloc
[bb
], expr
->bitmap_index
)
4413 && !TEST_BIT (transp
[bb
], expr
->bitmap_index
))
4416 /* Since different machines initialize their parameter registers
4417 in different orders, assume nothing. Collect the set of all
4418 parameter registers. */
4419 CLEAR_HARD_REG_SET (parm_regs
);
4421 for (p
= CALL_INSN_FUNCTION_USAGE (insn
); p
; p
= XEXP (p
, 1))
4422 if (GET_CODE (XEXP (p
, 0)) == USE
4423 && GET_CODE (XEXP (XEXP (p
, 0), 0)) == REG
)
4425 if (REGNO (XEXP (XEXP (p
, 0), 0)) >= FIRST_PSEUDO_REGISTER
)
4428 SET_HARD_REG_BIT (parm_regs
, REGNO (XEXP (XEXP (p
, 0), 0)));
4432 /* Search backward for the first set of a register in this set. */
4433 while (nparm_regs
&& BLOCK_HEAD (bb
) != insn
)
4435 insn
= PREV_INSN (insn
);
4436 p
= single_set (insn
);
4437 if (p
&& GET_CODE (SET_DEST (p
)) == REG
4438 && REGNO (SET_DEST (p
)) < FIRST_PSEUDO_REGISTER
4439 && TEST_HARD_REG_BIT (parm_regs
, REGNO (SET_DEST (p
))))
4441 CLEAR_HARD_REG_BIT (parm_regs
, REGNO (SET_DEST (p
)));
4446 /* If we found all the parameter loads, then we want to insert
4447 before the first parameter load.
4449 If we did not find all the parameter loads, then we might have
4450 stopped on the head of the block, which could be a CODE_LABEL.
4451 If we inserted before the CODE_LABEL, then we would be putting
4452 the insn in the wrong basic block. In that case, put the insn
4453 after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */
4454 while (GET_CODE (insn
) == CODE_LABEL
4455 || NOTE_INSN_BASIC_BLOCK_P (insn
))
4456 insn
= NEXT_INSN (insn
);
4458 new_insn
= emit_block_insn_before (pat
, insn
, BASIC_BLOCK (bb
));
4462 new_insn
= emit_insn_after (pat
, insn
);
4463 BLOCK_END (bb
) = new_insn
;
4466 /* Keep block number table up to date.
4467 Note, PAT could be a multiple insn sequence, we have to make
4468 sure that each insn in the sequence is handled. */
4469 if (GET_CODE (pat
) == SEQUENCE
)
4471 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
4473 rtx insn
= XVECEXP (pat
, 0, i
);
4475 set_block_num (insn
, bb
);
4477 add_label_notes (PATTERN (insn
), new_insn
);
4479 note_stores (PATTERN (insn
), record_set_info
, insn
);
4484 add_label_notes (SET_SRC (pat
), new_insn
);
4485 set_block_num (new_insn
, bb
);
4487 /* Keep register set table up to date. */
4488 record_one_set (regno
, new_insn
);
4491 gcse_create_count
++;
4495 fprintf (gcse_file
, "PRE/HOIST: end of bb %d, insn %d, ",
4496 bb
, INSN_UID (new_insn
));
4497 fprintf (gcse_file
, "copying expression %d to reg %d\n",
4498 expr
->bitmap_index
, regno
);
4502 /* Insert partially redundant expressions on edges in the CFG to make
4503 the expressions fully redundant. */
4506 pre_edge_insert (edge_list
, index_map
)
4507 struct edge_list
*edge_list
;
4508 struct expr
**index_map
;
4510 int e
, i
, j
, num_edges
, set_size
, did_insert
= 0;
4513 /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
4514 if it reaches any of the deleted expressions. */
4516 set_size
= pre_insert_map
[0]->size
;
4517 num_edges
= NUM_EDGES (edge_list
);
4518 inserted
= sbitmap_vector_alloc (num_edges
, n_exprs
);
4519 sbitmap_vector_zero (inserted
, num_edges
);
4521 for (e
= 0; e
< num_edges
; e
++)
4524 basic_block pred
= INDEX_EDGE_PRED_BB (edge_list
, e
);
4525 int bb
= pred
->index
;
4527 for (i
= indx
= 0; i
< set_size
; i
++, indx
+= SBITMAP_ELT_BITS
)
4529 SBITMAP_ELT_TYPE insert
= pre_insert_map
[e
]->elms
[i
];
4531 for (j
= indx
; insert
&& j
< n_exprs
; j
++, insert
>>= 1)
4532 if ((insert
& 1) != 0 && index_map
[j
]->reaching_reg
!= NULL_RTX
)
4534 struct expr
*expr
= index_map
[j
];
4537 /* Now look at each deleted occurence of this expression. */
4538 for (occr
= expr
->antic_occr
; occr
!= NULL
; occr
= occr
->next
)
4540 if (! occr
->deleted_p
)
4543 /* Insert this expression on this edge if if it would
4544 reach the deleted occurence in BB. */
4545 if (!TEST_BIT (inserted
[e
], j
))
4548 edge eg
= INDEX_EDGE (edge_list
, e
);
4550 /* We can't insert anything on an abnormal and
4551 critical edge, so we insert the insn at the end of
4552 the previous block. There are several alternatives
4553 detailed in Morgans book P277 (sec 10.5) for
4554 handling this situation. This one is easiest for
4557 if ((eg
->flags
& EDGE_ABNORMAL
) == EDGE_ABNORMAL
)
4558 insert_insn_end_bb (index_map
[j
], bb
, 0);
4561 insn
= process_insert_insn (index_map
[j
]);
4562 insert_insn_on_edge (insn
, eg
);
4567 fprintf (gcse_file
, "PRE/HOIST: edge (%d,%d), ",
4569 INDEX_EDGE_SUCC_BB (edge_list
, e
)->index
);
4570 fprintf (gcse_file
, "copy expression %d\n",
4571 expr
->bitmap_index
);
4574 SET_BIT (inserted
[e
], j
);
4576 gcse_create_count
++;
4587 /* Copy the result of INSN to REG. INDX is the expression number. */
4590 pre_insert_copy_insn (expr
, insn
)
4594 rtx reg
= expr
->reaching_reg
;
4595 int regno
= REGNO (reg
);
4596 int indx
= expr
->bitmap_index
;
4597 rtx set
= single_set (insn
);
4599 int bb
= BLOCK_NUM (insn
);
4604 new_insn
= emit_insn_after (gen_rtx_SET (VOIDmode
, reg
, SET_DEST (set
)),
4607 /* Keep block number table up to date. */
4608 set_block_num (new_insn
, bb
);
4610 /* Keep register set table up to date. */
4611 record_one_set (regno
, new_insn
);
4612 if (insn
== BLOCK_END (bb
))
4613 BLOCK_END (bb
) = new_insn
;
4615 gcse_create_count
++;
4619 "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
4620 BLOCK_NUM (insn
), INSN_UID (new_insn
), indx
,
4621 INSN_UID (insn
), regno
);
4624 /* Copy available expressions that reach the redundant expression
4625 to `reaching_reg'. */
4628 pre_insert_copies ()
4635 /* For each available expression in the table, copy the result to
4636 `reaching_reg' if the expression reaches a deleted one.
4638 ??? The current algorithm is rather brute force.
4639 Need to do some profiling. */
4641 for (i
= 0; i
< expr_hash_table_size
; i
++)
4642 for (expr
= expr_hash_table
[i
]; expr
!= NULL
; expr
= expr
->next_same_hash
)
4644 /* If the basic block isn't reachable, PPOUT will be TRUE. However,
4645 we don't want to insert a copy here because the expression may not
4646 really be redundant. So only insert an insn if the expression was
4647 deleted. This test also avoids further processing if the
4648 expression wasn't deleted anywhere. */
4649 if (expr
->reaching_reg
== NULL
)
4652 for (occr
= expr
->antic_occr
; occr
!= NULL
; occr
= occr
->next
)
4654 if (! occr
->deleted_p
)
4657 for (avail
= expr
->avail_occr
; avail
!= NULL
; avail
= avail
->next
)
4659 rtx insn
= avail
->insn
;
4661 /* No need to handle this one if handled already. */
4662 if (avail
->copied_p
)
4665 /* Don't handle this one if it's a redundant one. */
4666 if (TEST_BIT (pre_redundant_insns
, INSN_CUID (insn
)))
4669 /* Or if the expression doesn't reach the deleted one. */
4670 if (! pre_expr_reaches_here_p (BLOCK_NUM (avail
->insn
), expr
,
4671 BLOCK_NUM (occr
->insn
)))
4674 /* Copy the result of avail to reaching_reg. */
4675 pre_insert_copy_insn (expr
, insn
);
4676 avail
->copied_p
= 1;
4682 /* Delete redundant computations.
4683 Deletion is done by changing the insn to copy the `reaching_reg' of
4684 the expression into the result of the SET. It is left to later passes
4685 (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4687 Returns non-zero if a change is made. */
4698 for (i
= 0; i
< expr_hash_table_size
; i
++)
4699 for (expr
= expr_hash_table
[i
]; expr
!= NULL
; expr
= expr
->next_same_hash
)
4701 int indx
= expr
->bitmap_index
;
4703 /* We only need to search antic_occr since we require
4706 for (occr
= expr
->antic_occr
; occr
!= NULL
; occr
= occr
->next
)
4708 rtx insn
= occr
->insn
;
4710 int bb
= BLOCK_NUM (insn
);
4712 if (TEST_BIT (pre_delete_map
[bb
], indx
))
4714 set
= single_set (insn
);
4718 /* Create a pseudo-reg to store the result of reaching
4719 expressions into. Get the mode for the new pseudo from
4720 the mode of the original destination pseudo. */
4721 if (expr
->reaching_reg
== NULL
)
4723 = gen_reg_rtx (GET_MODE (SET_DEST (set
)));
4725 /* In theory this should never fail since we're creating
4728 However, on the x86 some of the movXX patterns actually
4729 contain clobbers of scratch regs. This may cause the
4730 insn created by validate_change to not match any pattern
4731 and thus cause validate_change to fail. */
4732 if (validate_change (insn
, &SET_SRC (set
),
4733 expr
->reaching_reg
, 0))
4735 occr
->deleted_p
= 1;
4736 SET_BIT (pre_redundant_insns
, INSN_CUID (insn
));
4744 "PRE: redundant insn %d (expression %d) in ",
4745 INSN_UID (insn
), indx
);
4746 fprintf (gcse_file
, "bb %d, reaching reg is %d\n",
4747 bb
, REGNO (expr
->reaching_reg
));
4756 /* Perform GCSE optimizations using PRE.
4757 This is called by one_pre_gcse_pass after all the dataflow analysis
4760 This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
4761 lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
4762 Compiler Design and Implementation.
4764 ??? A new pseudo reg is created to hold the reaching expression. The nice
4765 thing about the classical approach is that it would try to use an existing
4766 reg. If the register can't be adequately optimized [i.e. we introduce
4767 reload problems], one could add a pass here to propagate the new register
4770 ??? We don't handle single sets in PARALLELs because we're [currently] not
4771 able to copy the rest of the parallel when we insert copies to create full
4772 redundancies from partial redundancies. However, there's no reason why we
4773 can't handle PARALLELs in the cases where there are no partial
4780 int did_insert
, changed
;
4781 struct expr
**index_map
;
4784 /* Compute a mapping from expression number (`bitmap_index') to
4785 hash table entry. */
4787 index_map
= (struct expr
**) xcalloc (n_exprs
, sizeof (struct expr
*));
4788 for (i
= 0; i
< expr_hash_table_size
; i
++)
4789 for (expr
= expr_hash_table
[i
]; expr
!= NULL
; expr
= expr
->next_same_hash
)
4790 index_map
[expr
->bitmap_index
] = expr
;
4792 /* Reset bitmap used to track which insns are redundant. */
4793 pre_redundant_insns
= sbitmap_alloc (max_cuid
);
4794 sbitmap_zero (pre_redundant_insns
);
4796 /* Delete the redundant insns first so that
4797 - we know what register to use for the new insns and for the other
4798 ones with reaching expressions
4799 - we know which insns are redundant when we go to create copies */
4801 changed
= pre_delete ();
4803 did_insert
= pre_edge_insert (edge_list
, index_map
);
4805 /* In other places with reaching expressions, copy the expression to the
4806 specially allocated pseudo-reg that reaches the redundant expr. */
4807 pre_insert_copies ();
4810 commit_edge_insertions ();
4815 free (pre_redundant_insns
);
4819 /* Top level routine to perform one PRE GCSE pass.
4821 Return non-zero if a change was made. */
4824 one_pre_gcse_pass (pass
)
4829 gcse_subst_count
= 0;
4830 gcse_create_count
= 0;
4832 alloc_expr_hash_table (max_cuid
);
4833 add_noreturn_fake_exit_edges ();
4834 compute_expr_hash_table ();
4836 dump_hash_table (gcse_file
, "Expression", expr_hash_table
,
4837 expr_hash_table_size
, n_exprs
);
4841 alloc_pre_mem (n_basic_blocks
, n_exprs
);
4842 compute_pre_data ();
4843 changed
|= pre_gcse ();
4844 free_edge_list (edge_list
);
4848 remove_fake_edges ();
4849 free_expr_hash_table ();
4853 fprintf (gcse_file
, "\nPRE GCSE of %s, pass %d: %d bytes needed, ",
4854 current_function_name
, pass
, bytes_used
);
4855 fprintf (gcse_file
, "%d substs, %d insns created\n",
4856 gcse_subst_count
, gcse_create_count
);
4862 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
4863 If notes are added to an insn which references a CODE_LABEL, the
4864 LABEL_NUSES count is incremented. We have to add REG_LABEL notes,
4865 because the following loop optimization pass requires them. */
4867 /* ??? This is very similar to the loop.c add_label_notes function. We
4868 could probably share code here. */
4870 /* ??? If there was a jump optimization pass after gcse and before loop,
4871 then we would not need to do this here, because jump would add the
4872 necessary REG_LABEL notes. */
4875 add_label_notes (x
, insn
)
4879 enum rtx_code code
= GET_CODE (x
);
4883 if (code
== LABEL_REF
&& !LABEL_REF_NONLOCAL_P (x
))
4885 /* This code used to ignore labels that referred to dispatch tables to
4886 avoid flow generating (slighly) worse code.
4888 We no longer ignore such label references (see LABEL_REF handling in
4889 mark_jump_label for additional information). */
4891 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_LABEL
, XEXP (x
, 0),
4893 if (LABEL_P (XEXP (x
, 0)))
4894 LABEL_NUSES (XEXP (x
, 0))++;
4898 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
4901 add_label_notes (XEXP (x
, i
), insn
);
4902 else if (fmt
[i
] == 'E')
4903 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
4904 add_label_notes (XVECEXP (x
, i
, j
), insn
);
4908 /* Compute transparent outgoing information for each block.
4910 An expression is transparent to an edge unless it is killed by
4911 the edge itself. This can only happen with abnormal control flow,
4912 when the edge is traversed through a call. This happens with
4913 non-local labels and exceptions.
4915 This would not be necessary if we split the edge. While this is
4916 normally impossible for abnormal critical edges, with some effort
4917 it should be possible with exception handling, since we still have
4918 control over which handler should be invoked. But due to increased
4919 EH table sizes, this may not be worthwhile. */
4922 compute_transpout ()
4928 sbitmap_vector_ones (transpout
, n_basic_blocks
);
4930 for (bb
= 0; bb
< n_basic_blocks
; ++bb
)
4932 /* Note that flow inserted a nop a the end of basic blocks that
4933 end in call instructions for reasons other than abnormal
4935 if (GET_CODE (BLOCK_END (bb
)) != CALL_INSN
)
4938 for (i
= 0; i
< expr_hash_table_size
; i
++)
4939 for (expr
= expr_hash_table
[i
]; expr
; expr
= expr
->next_same_hash
)
4940 if (GET_CODE (expr
->expr
) == MEM
)
4942 if (GET_CODE (XEXP (expr
->expr
, 0)) == SYMBOL_REF
4943 && CONSTANT_POOL_ADDRESS_P (XEXP (expr
->expr
, 0)))
4946 /* ??? Optimally, we would use interprocedural alias
4947 analysis to determine if this mem is actually killed
4949 RESET_BIT (transpout
[bb
], expr
->bitmap_index
);
4954 /* Removal of useless null pointer checks */
4956 /* Called via note_stores. X is set by SETTER. If X is a register we must
4957 invalidate nonnull_local and set nonnull_killed. DATA is really a
4958 `null_pointer_info *'.
4960 We ignore hard registers. */
4963 invalidate_nonnull_info (x
, setter
, data
)
4965 rtx setter ATTRIBUTE_UNUSED
;
4969 struct null_pointer_info
*npi
= (struct null_pointer_info
*) data
;
4971 while (GET_CODE (x
) == SUBREG
)
4974 /* Ignore anything that is not a register or is a hard register. */
4975 if (GET_CODE (x
) != REG
4976 || REGNO (x
) < npi
->min_reg
4977 || REGNO (x
) >= npi
->max_reg
)
4980 regno
= REGNO (x
) - npi
->min_reg
;
4982 RESET_BIT (npi
->nonnull_local
[npi
->current_block
], regno
);
4983 SET_BIT (npi
->nonnull_killed
[npi
->current_block
], regno
);
4986 /* Do null-pointer check elimination for the registers indicated in
4987 NPI. NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps;
4988 they are not our responsibility to free. */
4991 delete_null_pointer_checks_1 (delete_list
, block_reg
, nonnull_avin
,
4993 varray_type
*delete_list
;
4994 unsigned int *block_reg
;
4995 sbitmap
*nonnull_avin
;
4996 sbitmap
*nonnull_avout
;
4997 struct null_pointer_info
*npi
;
5001 sbitmap
*nonnull_local
= npi
->nonnull_local
;
5002 sbitmap
*nonnull_killed
= npi
->nonnull_killed
;
5004 /* Compute local properties, nonnull and killed. A register will have
5005 the nonnull property if at the end of the current block its value is
5006 known to be nonnull. The killed property indicates that somewhere in
5007 the block any information we had about the register is killed.
5009 Note that a register can have both properties in a single block. That
5010 indicates that it's killed, then later in the block a new value is
5012 sbitmap_vector_zero (nonnull_local
, n_basic_blocks
);
5013 sbitmap_vector_zero (nonnull_killed
, n_basic_blocks
);
5015 for (current_block
= 0; current_block
< n_basic_blocks
; current_block
++)
5017 rtx insn
, stop_insn
;
5019 /* Set the current block for invalidate_nonnull_info. */
5020 npi
->current_block
= current_block
;
5022 /* Scan each insn in the basic block looking for memory references and
5024 stop_insn
= NEXT_INSN (BLOCK_END (current_block
));
5025 for (insn
= BLOCK_HEAD (current_block
);
5027 insn
= NEXT_INSN (insn
))
5032 /* Ignore anything that is not a normal insn. */
5033 if (! INSN_P (insn
))
5036 /* Basically ignore anything that is not a simple SET. We do have
5037 to make sure to invalidate nonnull_local and set nonnull_killed
5038 for such insns though. */
5039 set
= single_set (insn
);
5042 note_stores (PATTERN (insn
), invalidate_nonnull_info
, npi
);
5046 /* See if we've got a useable memory load. We handle it first
5047 in case it uses its address register as a dest (which kills
5048 the nonnull property). */
5049 if (GET_CODE (SET_SRC (set
)) == MEM
5050 && GET_CODE ((reg
= XEXP (SET_SRC (set
), 0))) == REG
5051 && REGNO (reg
) >= npi
->min_reg
5052 && REGNO (reg
) < npi
->max_reg
)
5053 SET_BIT (nonnull_local
[current_block
],
5054 REGNO (reg
) - npi
->min_reg
);
5056 /* Now invalidate stuff clobbered by this insn. */
5057 note_stores (PATTERN (insn
), invalidate_nonnull_info
, npi
);
5059 /* And handle stores, we do these last since any sets in INSN can
5060 not kill the nonnull property if it is derived from a MEM
5061 appearing in a SET_DEST. */
5062 if (GET_CODE (SET_DEST (set
)) == MEM
5063 && GET_CODE ((reg
= XEXP (SET_DEST (set
), 0))) == REG
5064 && REGNO (reg
) >= npi
->min_reg
5065 && REGNO (reg
) < npi
->max_reg
)
5066 SET_BIT (nonnull_local
[current_block
],
5067 REGNO (reg
) - npi
->min_reg
);
5071 /* Now compute global properties based on the local properties. This
5072 is a classic global availablity algorithm. */
5073 compute_available (nonnull_local
, nonnull_killed
,
5074 nonnull_avout
, nonnull_avin
);
5076 /* Now look at each bb and see if it ends with a compare of a value
5078 for (bb
= 0; bb
< n_basic_blocks
; bb
++)
5080 rtx last_insn
= BLOCK_END (bb
);
5081 rtx condition
, earliest
;
5082 int compare_and_branch
;
5084 /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and
5085 since BLOCK_REG[BB] is zero if this block did not end with a
5086 comparison against zero, this condition works. */
5087 if (block_reg
[bb
] < npi
->min_reg
5088 || block_reg
[bb
] >= npi
->max_reg
)
5091 /* LAST_INSN is a conditional jump. Get its condition. */
5092 condition
= get_condition (last_insn
, &earliest
);
5094 /* If we can't determine the condition then skip. */
5098 /* Is the register known to have a nonzero value? */
5099 if (!TEST_BIT (nonnull_avout
[bb
], block_reg
[bb
] - npi
->min_reg
))
5102 /* Try to compute whether the compare/branch at the loop end is one or
5103 two instructions. */
5104 if (earliest
== last_insn
)
5105 compare_and_branch
= 1;
5106 else if (earliest
== prev_nonnote_insn (last_insn
))
5107 compare_and_branch
= 2;
5111 /* We know the register in this comparison is nonnull at exit from
5112 this block. We can optimize this comparison. */
5113 if (GET_CODE (condition
) == NE
)
5117 new_jump
= emit_jump_insn_before (gen_jump (JUMP_LABEL (last_insn
)),
5119 JUMP_LABEL (new_jump
) = JUMP_LABEL (last_insn
);
5120 LABEL_NUSES (JUMP_LABEL (new_jump
))++;
5121 emit_barrier_after (new_jump
);
5124 VARRAY_RTX_INIT (*delete_list
, 10, "delete_list");
5126 VARRAY_PUSH_RTX (*delete_list
, last_insn
);
5127 if (compare_and_branch
== 2)
5128 VARRAY_PUSH_RTX (*delete_list
, earliest
);
5130 /* Don't check this block again. (Note that BLOCK_END is
5131 invalid here; we deleted the last instruction in the
5137 /* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
5140 This is conceptually similar to global constant/copy propagation and
5141 classic global CSE (it even uses the same dataflow equations as cprop).
5143 If a register is used as memory address with the form (mem (reg)), then we
5144 know that REG can not be zero at that point in the program. Any instruction
5145 which sets REG "kills" this property.
5147 So, if every path leading to a conditional branch has an available memory
5148 reference of that form, then we know the register can not have the value
5149 zero at the conditional branch.
5151 So we merely need to compute the local properies and propagate that data
5152 around the cfg, then optimize where possible.
5154 We run this pass two times. Once before CSE, then again after CSE. This
5155 has proven to be the most profitable approach. It is rare for new
5156 optimization opportunities of this nature to appear after the first CSE
5159 This could probably be integrated with global cprop with a little work. */
5162 delete_null_pointer_checks (f
)
5163 rtx f ATTRIBUTE_UNUSED
;
5165 sbitmap
*nonnull_avin
, *nonnull_avout
;
5166 unsigned int *block_reg
;
5167 varray_type delete_list
= NULL
;
5173 struct null_pointer_info npi
;
5175 /* If we have only a single block, then there's nothing to do. */
5176 if (n_basic_blocks
<= 1)
5179 /* Trying to perform global optimizations on flow graphs which have
5180 a high connectivity will take a long time and is unlikely to be
5181 particularly useful.
5183 In normal circumstances a cfg should have about twice as many edges
5184 as blocks. But we do not want to punish small functions which have
5185 a couple switch statements. So we require a relatively large number
5186 of basic blocks and the ratio of edges to blocks to be high. */
5187 if (n_basic_blocks
> 1000 && n_edges
/ n_basic_blocks
>= 20)
5190 /* We need four bitmaps, each with a bit for each register in each
5192 max_reg
= max_reg_num ();
5193 regs_per_pass
= get_bitmap_width (4, n_basic_blocks
, max_reg
);
5195 /* Allocate bitmaps to hold local and global properties. */
5196 npi
.nonnull_local
= sbitmap_vector_alloc (n_basic_blocks
, regs_per_pass
);
5197 npi
.nonnull_killed
= sbitmap_vector_alloc (n_basic_blocks
, regs_per_pass
);
5198 nonnull_avin
= sbitmap_vector_alloc (n_basic_blocks
, regs_per_pass
);
5199 nonnull_avout
= sbitmap_vector_alloc (n_basic_blocks
, regs_per_pass
);
5201 /* Go through the basic blocks, seeing whether or not each block
5202 ends with a conditional branch whose condition is a comparison
5203 against zero. Record the register compared in BLOCK_REG. */
5204 block_reg
= (unsigned int *) xcalloc (n_basic_blocks
, sizeof (int));
5205 for (bb
= 0; bb
< n_basic_blocks
; bb
++)
5207 rtx last_insn
= BLOCK_END (bb
);
5208 rtx condition
, earliest
, reg
;
5210 /* We only want conditional branches. */
5211 if (GET_CODE (last_insn
) != JUMP_INSN
5212 || !any_condjump_p (last_insn
)
5213 || !onlyjump_p (last_insn
))
5216 /* LAST_INSN is a conditional jump. Get its condition. */
5217 condition
= get_condition (last_insn
, &earliest
);
5219 /* If we were unable to get the condition, or it is not a equality
5220 comparison against zero then there's nothing we can do. */
5222 || (GET_CODE (condition
) != NE
&& GET_CODE (condition
) != EQ
)
5223 || GET_CODE (XEXP (condition
, 1)) != CONST_INT
5224 || (XEXP (condition
, 1)
5225 != CONST0_RTX (GET_MODE (XEXP (condition
, 0)))))
5228 /* We must be checking a register against zero. */
5229 reg
= XEXP (condition
, 0);
5230 if (GET_CODE (reg
) != REG
)
5233 block_reg
[bb
] = REGNO (reg
);
5236 /* Go through the algorithm for each block of registers. */
5237 for (reg
= FIRST_PSEUDO_REGISTER
; reg
< max_reg
; reg
+= regs_per_pass
)
5240 npi
.max_reg
= MIN (reg
+ regs_per_pass
, max_reg
);
5241 delete_null_pointer_checks_1 (&delete_list
, block_reg
, nonnull_avin
,
5242 nonnull_avout
, &npi
);
5245 /* Now delete the instructions all at once. This breaks the CFG. */
5248 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (delete_list
); i
++)
5249 delete_insn (VARRAY_RTX (delete_list
, i
));
5250 VARRAY_FREE (delete_list
);
5253 /* Free the table of registers compared at the end of every block. */
5257 free (npi
.nonnull_local
);
5258 free (npi
.nonnull_killed
);
5259 free (nonnull_avin
);
5260 free (nonnull_avout
);
5263 /* Code Hoisting variables and subroutines. */
5265 /* Very busy expressions. */
5266 static sbitmap
*hoist_vbein
;
5267 static sbitmap
*hoist_vbeout
;
5269 /* Hoistable expressions. */
5270 static sbitmap
*hoist_exprs
;
5272 /* Dominator bitmaps. */
5273 static sbitmap
*dominators
;
5275 /* ??? We could compute post dominators and run this algorithm in
5276 reverse to to perform tail merging, doing so would probably be
5277 more effective than the tail merging code in jump.c.
5279 It's unclear if tail merging could be run in parallel with
5280 code hoisting. It would be nice. */
5282 /* Allocate vars used for code hoisting analysis. */
5285 alloc_code_hoist_mem (n_blocks
, n_exprs
)
5286 int n_blocks
, n_exprs
;
5288 antloc
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
5289 transp
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
5290 comp
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
5292 hoist_vbein
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
5293 hoist_vbeout
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
5294 hoist_exprs
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
5295 transpout
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
5297 dominators
= sbitmap_vector_alloc (n_blocks
, n_blocks
);
5300 /* Free vars used for code hoisting analysis. */
5303 free_code_hoist_mem ()
5310 free (hoist_vbeout
);
5317 /* Compute the very busy expressions at entry/exit from each block.
5319 An expression is very busy if all paths from a given point
5320 compute the expression. */
5323 compute_code_hoist_vbeinout ()
5325 int bb
, changed
, passes
;
5327 sbitmap_vector_zero (hoist_vbeout
, n_basic_blocks
);
5328 sbitmap_vector_zero (hoist_vbein
, n_basic_blocks
);
5337 /* We scan the blocks in the reverse order to speed up
5339 for (bb
= n_basic_blocks
- 1; bb
>= 0; bb
--)
5341 changed
|= sbitmap_a_or_b_and_c (hoist_vbein
[bb
], antloc
[bb
],
5342 hoist_vbeout
[bb
], transp
[bb
]);
5343 if (bb
!= n_basic_blocks
- 1)
5344 sbitmap_intersection_of_succs (hoist_vbeout
[bb
], hoist_vbein
, bb
);
5351 fprintf (gcse_file
, "hoisting vbeinout computation: %d passes\n", passes
);
5354 /* Top level routine to do the dataflow analysis needed by code hoisting. */
5357 compute_code_hoist_data ()
5359 compute_local_properties (transp
, comp
, antloc
, 0);
5360 compute_transpout ();
5361 compute_code_hoist_vbeinout ();
5362 calculate_dominance_info (NULL
, dominators
, CDI_DOMINATORS
);
5364 fprintf (gcse_file
, "\n");
5367 /* Determine if the expression identified by EXPR_INDEX would
5368 reach BB unimpared if it was placed at the end of EXPR_BB.
5370 It's unclear exactly what Muchnick meant by "unimpared". It seems
5371 to me that the expression must either be computed or transparent in
5372 *every* block in the path(s) from EXPR_BB to BB. Any other definition
5373 would allow the expression to be hoisted out of loops, even if
5374 the expression wasn't a loop invariant.
5376 Contrast this to reachability for PRE where an expression is
5377 considered reachable if *any* path reaches instead of *all*
5381 hoist_expr_reaches_here_p (expr_bb
, expr_index
, bb
, visited
)
5388 int visited_allocated_locally
= 0;
5391 if (visited
== NULL
)
5393 visited_allocated_locally
= 1;
5394 visited
= xcalloc (n_basic_blocks
, 1);
5397 for (pred
= BASIC_BLOCK (bb
)->pred
; pred
!= NULL
; pred
= pred
->pred_next
)
5399 int pred_bb
= pred
->src
->index
;
5401 if (pred
->src
== ENTRY_BLOCK_PTR
)
5403 else if (visited
[pred_bb
])
5406 /* Does this predecessor generate this expression? */
5407 else if (TEST_BIT (comp
[pred_bb
], expr_index
))
5409 else if (! TEST_BIT (transp
[pred_bb
], expr_index
))
5415 visited
[pred_bb
] = 1;
5416 if (! hoist_expr_reaches_here_p (expr_bb
, expr_index
,
5421 if (visited_allocated_locally
)
5424 return (pred
== NULL
);
5427 /* Actually perform code hoisting. */
5434 struct expr
**index_map
;
5437 sbitmap_vector_zero (hoist_exprs
, n_basic_blocks
);
5439 /* Compute a mapping from expression number (`bitmap_index') to
5440 hash table entry. */
5442 index_map
= (struct expr
**) xcalloc (n_exprs
, sizeof (struct expr
*));
5443 for (i
= 0; i
< expr_hash_table_size
; i
++)
5444 for (expr
= expr_hash_table
[i
]; expr
!= NULL
; expr
= expr
->next_same_hash
)
5445 index_map
[expr
->bitmap_index
] = expr
;
5447 /* Walk over each basic block looking for potentially hoistable
5448 expressions, nothing gets hoisted from the entry block. */
5449 for (bb
= 0; bb
< n_basic_blocks
; bb
++)
5452 int insn_inserted_p
;
5454 /* Examine each expression that is very busy at the exit of this
5455 block. These are the potentially hoistable expressions. */
5456 for (i
= 0; i
< hoist_vbeout
[bb
]->n_bits
; i
++)
5460 if (TEST_BIT (hoist_vbeout
[bb
], i
) && TEST_BIT (transpout
[bb
], i
))
5462 /* We've found a potentially hoistable expression, now
5463 we look at every block BB dominates to see if it
5464 computes the expression. */
5465 for (dominated
= 0; dominated
< n_basic_blocks
; dominated
++)
5467 /* Ignore self dominance. */
5469 || ! TEST_BIT (dominators
[dominated
], bb
))
5472 /* We've found a dominated block, now see if it computes
5473 the busy expression and whether or not moving that
5474 expression to the "beginning" of that block is safe. */
5475 if (!TEST_BIT (antloc
[dominated
], i
))
5478 /* Note if the expression would reach the dominated block
5479 unimpared if it was placed at the end of BB.
5481 Keep track of how many times this expression is hoistable
5482 from a dominated block into BB. */
5483 if (hoist_expr_reaches_here_p (bb
, i
, dominated
, NULL
))
5487 /* If we found more than one hoistable occurence of this
5488 expression, then note it in the bitmap of expressions to
5489 hoist. It makes no sense to hoist things which are computed
5490 in only one BB, and doing so tends to pessimize register
5491 allocation. One could increase this value to try harder
5492 to avoid any possible code expansion due to register
5493 allocation issues; however experiments have shown that
5494 the vast majority of hoistable expressions are only movable
5495 from two successors, so raising this threshhold is likely
5496 to nullify any benefit we get from code hoisting. */
5499 SET_BIT (hoist_exprs
[bb
], i
);
5505 /* If we found nothing to hoist, then quit now. */
5509 /* Loop over all the hoistable expressions. */
5510 for (i
= 0; i
< hoist_exprs
[bb
]->n_bits
; i
++)
5512 /* We want to insert the expression into BB only once, so
5513 note when we've inserted it. */
5514 insn_inserted_p
= 0;
5516 /* These tests should be the same as the tests above. */
5517 if (TEST_BIT (hoist_vbeout
[bb
], i
))
5519 /* We've found a potentially hoistable expression, now
5520 we look at every block BB dominates to see if it
5521 computes the expression. */
5522 for (dominated
= 0; dominated
< n_basic_blocks
; dominated
++)
5524 /* Ignore self dominance. */
5526 || ! TEST_BIT (dominators
[dominated
], bb
))
5529 /* We've found a dominated block, now see if it computes
5530 the busy expression and whether or not moving that
5531 expression to the "beginning" of that block is safe. */
5532 if (!TEST_BIT (antloc
[dominated
], i
))
5535 /* The expression is computed in the dominated block and
5536 it would be safe to compute it at the start of the
5537 dominated block. Now we have to determine if the
5538 expresion would reach the dominated block if it was
5539 placed at the end of BB. */
5540 if (hoist_expr_reaches_here_p (bb
, i
, dominated
, NULL
))
5542 struct expr
*expr
= index_map
[i
];
5543 struct occr
*occr
= expr
->antic_occr
;
5547 /* Find the right occurence of this expression. */
5548 while (BLOCK_NUM (occr
->insn
) != dominated
&& occr
)
5551 /* Should never happen. */
5557 set
= single_set (insn
);
5561 /* Create a pseudo-reg to store the result of reaching
5562 expressions into. Get the mode for the new pseudo
5563 from the mode of the original destination pseudo. */
5564 if (expr
->reaching_reg
== NULL
)
5566 = gen_reg_rtx (GET_MODE (SET_DEST (set
)));
5568 /* In theory this should never fail since we're creating
5571 However, on the x86 some of the movXX patterns
5572 actually contain clobbers of scratch regs. This may
5573 cause the insn created by validate_change to not
5574 match any pattern and thus cause validate_change to
5576 if (validate_change (insn
, &SET_SRC (set
),
5577 expr
->reaching_reg
, 0))
5579 occr
->deleted_p
= 1;
5580 if (!insn_inserted_p
)
5582 insert_insn_end_bb (index_map
[i
], bb
, 0);
5583 insn_inserted_p
= 1;
5595 /* Top level routine to perform one code hoisting (aka unification) pass
5597 Return non-zero if a change was made. */
5600 one_code_hoisting_pass ()
5604 alloc_expr_hash_table (max_cuid
);
5605 compute_expr_hash_table ();
5607 dump_hash_table (gcse_file
, "Code Hosting Expressions", expr_hash_table
,
5608 expr_hash_table_size
, n_exprs
);
5612 alloc_code_hoist_mem (n_basic_blocks
, n_exprs
);
5613 compute_code_hoist_data ();
5615 free_code_hoist_mem ();
5618 free_expr_hash_table ();