1 /* Global common subexpression elimination/Partial redundancy elimination
2 and global constant/copy propagation for GNU compiler.
3 Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the Free
20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 - reordering of memory allocation and freeing to be more space efficient
25 - do rough calc of how many regs are needed in each block, and a rough
26 calc of how many regs are available in each class and use that to
27 throttle back the code in cases where RTX_COST is minimal.
28 - a store to the same address as a load does not kill the load if the
29 source of the store is also the destination of the load. Handling this
30 allows more load motion, particularly out of loops.
31 - ability to realloc sbitmap vectors would allow one initial computation
32 of reg_set_in_block with only subsequent additions, rather than
33 recomputing it for each pass
37 /* References searched while implementing this.
39 Compilers Principles, Techniques and Tools
43 Global Optimization by Suppression of Partial Redundancies
45 communications of the acm, Vol. 22, Num. 2, Feb. 1979
47 A Portable Machine-Independent Global Optimizer - Design and Measurements
49 Stanford Ph.D. thesis, Dec. 1983
51 A Fast Algorithm for Code Movement Optimization
53 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
55 A Solution to a Problem with Morel and Renvoise's
56 Global Optimization by Suppression of Partial Redundancies
57 K-H Drechsler, M.P. Stadel
58 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
60 Practical Adaptation of the Global Optimization
61 Algorithm of Morel and Renvoise
63 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
65 Efficiently Computing Static Single Assignment Form and the Control
67 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
68 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
71 J. Knoop, O. Ruthing, B. Steffen
72 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
74 What's In a Region? Or Computing Control Dependence Regions in Near-Linear
75 Time for Reducible Flow Control
77 ACM Letters on Programming Languages and Systems,
78 Vol. 2, Num. 1-4, Mar-Dec 1993
80 An Efficient Representation for Sparse Sets
81 Preston Briggs, Linda Torczon
82 ACM Letters on Programming Languages and Systems,
83 Vol. 2, Num. 1-4, Mar-Dec 1993
85 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
86 K-H Drechsler, M.P. Stadel
87 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
89 Partial Dead Code Elimination
90 J. Knoop, O. Ruthing, B. Steffen
91 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
93 Effective Partial Redundancy Elimination
94 P. Briggs, K.D. Cooper
95 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
97 The Program Structure Tree: Computing Control Regions in Linear Time
98 R. Johnson, D. Pearson, K. Pingali
99 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
101 Optimal Code Motion: Theory and Practice
102 J. Knoop, O. Ruthing, B. Steffen
103 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
105 The power of assignment motion
106 J. Knoop, O. Ruthing, B. Steffen
107 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
109 Global code motion / global value numbering
111 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
113 Value Driven Redundancy Elimination
115 Rice University Ph.D. thesis, Apr. 1996
119 Massively Scalar Compiler Project, Rice University, Sep. 1996
121 High Performance Compilers for Parallel Computing
125 Advanced Compiler Design and Implementation
127 Morgan Kaufmann, 1997
129 Building an Optimizing Compiler
133 People wishing to speed up the code here should read:
134 Elimination Algorithms for Data Flow Analysis
135 B.G. Ryder, M.C. Paull
136 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
138 How to Analyze Large Programs Efficiently and Informatively
139 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
140 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
142 People wishing to do something different can find various possibilities
143 in the above papers and elsewhere.
148 #include "coretypes.h"
155 #include "hard-reg-set.h"
158 #include "insn-config.h"
160 #include "basic-block.h"
162 #include "function.h"
171 /* Propagate flow information through back edges and thus enable PRE's
172 moving loop invariant calculations out of loops.
174 Originally this tended to create worse overall code, but several
175 improvements during the development of PRE seem to have made following
176 back edges generally a win.
178 Note much of the loop invariant code motion done here would normally
179 be done by loop.c, which has more heuristics for when to move invariants
180 out of loops. At some point we might need to move some of those
181 heuristics into gcse.c. */
183 /* We support GCSE via Partial Redundancy Elimination. PRE optimizations
184 are a superset of those done by GCSE.
186 We perform the following steps:
188 1) Compute basic block information.
190 2) Compute table of places where registers are set.
192 3) Perform copy/constant propagation.
194 4) Perform global cse.
196 5) Perform another pass of copy/constant propagation.
198 Two passes of copy/constant propagation are done because the first one
199 enables more GCSE and the second one helps to clean up the copies that
200 GCSE creates. This is needed more for PRE than for Classic because Classic
201 GCSE will try to use an existing register containing the common
202 subexpression rather than create a new one. This is harder to do for PRE
203 because of the code motion (which Classic GCSE doesn't do).
205 Expressions we are interested in GCSE-ing are of the form
206 (set (pseudo-reg) (expression)).
207 Function want_to_gcse_p says what these are.
209 PRE handles moving invariant expressions out of loops (by treating them as
210 partially redundant).
212 Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
213 assignment) based GVN (global value numbering). L. T. Simpson's paper
214 (Rice University) on value numbering is a useful reference for this.
216 **********************
218 We used to support multiple passes but there are diminishing returns in
219 doing so. The first pass usually makes 90% of the changes that are doable.
220 A second pass can make a few more changes made possible by the first pass.
221 Experiments show any further passes don't make enough changes to justify
224 A study of spec92 using an unlimited number of passes:
225 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
226 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
227 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
229 It was found doing copy propagation between each pass enables further
232 PRE is quite expensive in complicated functions because the DFA can take
233 awhile to converge. Hence we only perform one pass. The parameter max-gcse-passes can
234 be modified if one wants to experiment.
236 **********************
238 The steps for PRE are:
240 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
242 2) Perform the data flow analysis for PRE.
244 3) Delete the redundant instructions
246 4) Insert the required copies [if any] that make the partially
247 redundant instructions fully redundant.
249 5) For other reaching expressions, insert an instruction to copy the value
250 to a newly created pseudo that will reach the redundant instruction.
252 The deletion is done first so that when we do insertions we
253 know which pseudo reg to use.
255 Various papers have argued that PRE DFA is expensive (O(n^2)) and others
256 argue it is not. The number of iterations for the algorithm to converge
257 is typically 2-4 so I don't view it as that expensive (relatively speaking).
259 PRE GCSE depends heavily on the second CSE pass to clean up the copies
260 we create. To make an expression reach the place where it's redundant,
261 the result of the expression is copied to a new register, and the redundant
262 expression is deleted by replacing it with this new register. Classic GCSE
263 doesn't have this problem as much as it computes the reaching defs of
264 each register in each block and thus can try to use an existing register.
266 **********************
268 A fair bit of simplicity is created by creating small functions for simple
269 tasks, even when the function is only called in one place. This may
270 measurably slow things down [or may not] by creating more function call
271 overhead than is necessary. The source is laid out so that it's trivial
272 to make the affected functions inline so that one can measure what speed
273 up, if any, can be achieved, and maybe later when things settle things can
276 Help stamp out big monolithic functions! */
278 /* GCSE global vars. */
281 static FILE *gcse_file
;
283 /* Note whether or not we should run jump optimization after gcse. We
284 want to do this for two cases.
286 * If we changed any jumps via cprop.
288 * If we added any labels via edge splitting. */
290 static int run_jump_opt_after_gcse
;
292 /* Bitmaps are normally not included in debugging dumps.
293 However it's useful to be able to print them from GDB.
294 We could create special functions for this, but it's simpler to
295 just allow passing stderr to the dump_foo fns. Since stderr can
296 be a macro, we store a copy here. */
297 static FILE *debug_stderr
;
299 /* An obstack for our working variables. */
300 static struct obstack gcse_obstack
;
302 /* Nonzero for each mode that supports (set (reg) (reg)).
303 This is trivially true for integer and floating point values.
304 It may or may not be true for condition codes. */
305 static char can_copy_p
[(int) NUM_MACHINE_MODES
];
307 /* Nonzero if can_copy_p has been initialized. */
308 static int can_copy_init_p
;
310 struct reg_use
{rtx reg_rtx
; };
312 /* Hash table of expressions. */
316 /* The expression (SET_SRC for expressions, PATTERN for assignments). */
318 /* Index in the available expression bitmaps. */
320 /* Next entry with the same hash. */
321 struct expr
*next_same_hash
;
322 /* List of anticipatable occurrences in basic blocks in the function.
323 An "anticipatable occurrence" is one that is the first occurrence in the
324 basic block, the operands are not modified in the basic block prior
325 to the occurrence and the output is not used between the start of
326 the block and the occurrence. */
327 struct occr
*antic_occr
;
328 /* List of available occurrence in basic blocks in the function.
329 An "available occurrence" is one that is the last occurrence in the
330 basic block and the operands are not modified by following statements in
331 the basic block [including this insn]. */
332 struct occr
*avail_occr
;
333 /* Non-null if the computation is PRE redundant.
334 The value is the newly created pseudo-reg to record a copy of the
335 expression in all the places that reach the redundant copy. */
339 /* Occurrence of an expression.
340 There is one per basic block. If a pattern appears more than once the
341 last appearance is used [or first for anticipatable expressions]. */
345 /* Next occurrence of this expression. */
347 /* The insn that computes the expression. */
349 /* Nonzero if this [anticipatable] occurrence has been deleted. */
351 /* Nonzero if this [available] occurrence has been copied to
353 /* ??? This is mutually exclusive with deleted_p, so they could share
358 /* Expression and copy propagation hash tables.
359 Each hash table is an array of buckets.
360 ??? It is known that if it were an array of entries, structure elements
361 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
362 not clear whether in the final analysis a sufficient amount of memory would
363 be saved as the size of the available expression bitmaps would be larger
364 [one could build a mapping table without holes afterwards though].
365 Someday I'll perform the computation and figure it out. */
370 This is an array of `expr_hash_table_size' elements. */
373 /* Size of the hash table, in elements. */
376 /* Number of hash table elements. */
377 unsigned int n_elems
;
379 /* Whether the table is expression of copy propagation one. */
383 /* Expression hash table. */
384 static struct hash_table expr_hash_table
;
386 /* Copy propagation hash table. */
387 static struct hash_table set_hash_table
;
389 /* Mapping of uids to cuids.
390 Only real insns get cuids. */
391 static int *uid_cuid
;
393 /* Highest UID in UID_CUID. */
396 /* Get the cuid of an insn. */
397 #ifdef ENABLE_CHECKING
398 #define INSN_CUID(INSN) (INSN_UID (INSN) > max_uid ? (abort (), 0) : uid_cuid[INSN_UID (INSN)])
400 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
403 /* Number of cuids. */
406 /* Mapping of cuids to insns. */
407 static rtx
*cuid_insn
;
409 /* Get insn from cuid. */
410 #define CUID_INSN(CUID) (cuid_insn[CUID])
412 /* Maximum register number in function prior to doing gcse + 1.
413 Registers created during this pass have regno >= max_gcse_regno.
414 This is named with "gcse" to not collide with global of same name. */
415 static unsigned int max_gcse_regno
;
417 /* Table of registers that are modified.
419 For each register, each element is a list of places where the pseudo-reg
422 For simplicity, GCSE is done on sets of pseudo-regs only. PRE GCSE only
423 requires knowledge of which blocks kill which regs [and thus could use
424 a bitmap instead of the lists `reg_set_table' uses].
426 `reg_set_table' and could be turned into an array of bitmaps (num-bbs x
427 num-regs) [however perhaps it may be useful to keep the data as is]. One
428 advantage of recording things this way is that `reg_set_table' is fairly
429 sparse with respect to pseudo regs but for hard regs could be fairly dense
430 [relatively speaking]. And recording sets of pseudo-regs in lists speeds
431 up functions like compute_transp since in the case of pseudo-regs we only
432 need to iterate over the number of times a pseudo-reg is set, not over the
433 number of basic blocks [clearly there is a bit of a slow down in the cases
434 where a pseudo is set more than once in a block, however it is believed
435 that the net effect is to speed things up]. This isn't done for hard-regs
436 because recording call-clobbered hard-regs in `reg_set_table' at each
437 function call can consume a fair bit of memory, and iterating over
438 hard-regs stored this way in compute_transp will be more expensive. */
440 typedef struct reg_set
442 /* The next setting of this register. */
443 struct reg_set
*next
;
444 /* The insn where it was set. */
448 static reg_set
**reg_set_table
;
450 /* Size of `reg_set_table'.
451 The table starts out at max_gcse_regno + slop, and is enlarged as
453 static int reg_set_table_size
;
455 /* Amount to grow `reg_set_table' by when it's full. */
456 #define REG_SET_TABLE_SLOP 100
458 /* This is a list of expressions which are MEMs and will be used by load
460 Load motion tracks MEMs which aren't killed by
461 anything except itself. (ie, loads and stores to a single location).
462 We can then allow movement of these MEM refs with a little special
463 allowance. (all stores copy the same value to the reaching reg used
464 for the loads). This means all values used to store into memory must have
465 no side effects so we can re-issue the setter value.
466 Store Motion uses this structure as an expression table to track stores
467 which look interesting, and might be moveable towards the exit block. */
471 struct expr
* expr
; /* Gcse expression reference for LM. */
472 rtx pattern
; /* Pattern of this mem. */
473 rtx loads
; /* INSN list of loads seen. */
474 rtx stores
; /* INSN list of stores seen. */
475 struct ls_expr
* next
; /* Next in the list. */
476 int invalid
; /* Invalid for some reason. */
477 int index
; /* If it maps to a bitmap index. */
478 int hash_index
; /* Index when in a hash table. */
479 rtx reaching_reg
; /* Register to use when re-writing. */
482 /* Array of implicit set patterns indexed by basic block index. */
483 static rtx
*implicit_sets
;
485 /* Head of the list of load/store memory refs. */
486 static struct ls_expr
* pre_ldst_mems
= NULL
;
488 /* Bitmap containing one bit for each register in the program.
489 Used when performing GCSE to track which registers have been set since
490 the start of the basic block. */
491 static regset reg_set_bitmap
;
493 /* For each block, a bitmap of registers set in the block.
494 This is used by expr_killed_p and compute_transp.
495 It is computed during hash table computation and not by compute_sets
496 as it includes registers added since the last pass (or between cprop and
497 gcse) and it's currently not easy to realloc sbitmap vectors. */
498 static sbitmap
*reg_set_in_block
;
500 /* Array, indexed by basic block number for a list of insns which modify
501 memory within that block. */
502 static rtx
* modify_mem_list
;
503 bitmap modify_mem_list_set
;
505 /* This array parallels modify_mem_list, but is kept canonicalized. */
506 static rtx
* canon_modify_mem_list
;
507 bitmap canon_modify_mem_list_set
;
508 /* Various variables for statistics gathering. */
510 /* Memory used in a pass.
511 This isn't intended to be absolutely precise. Its intent is only
512 to keep an eye on memory usage. */
513 static int bytes_used
;
515 /* GCSE substitutions made. */
516 static int gcse_subst_count
;
517 /* Number of copy instructions created. */
518 static int gcse_create_count
;
519 /* Number of constants propagated. */
520 static int const_prop_count
;
521 /* Number of copys propagated. */
522 static int copy_prop_count
;
524 /* These variables are used by classic GCSE.
525 Normally they'd be defined a bit later, but `rd_gen' needs to
526 be declared sooner. */
528 /* Each block has a bitmap of each type.
529 The length of each blocks bitmap is:
531 max_cuid - for reaching definitions
532 n_exprs - for available expressions
534 Thus we view the bitmaps as 2 dimensional arrays. i.e.
535 rd_kill[block_num][cuid_num]
536 ae_kill[block_num][expr_num] */
538 /* For reaching defs */
539 static sbitmap
*rd_kill
, *rd_gen
, *reaching_defs
, *rd_out
;
541 /* for available exprs */
542 static sbitmap
*ae_kill
, *ae_gen
, *ae_in
, *ae_out
;
544 /* Objects of this type are passed around by the null-pointer check
546 struct null_pointer_info
548 /* The basic block being processed. */
549 basic_block current_block
;
550 /* The first register to be handled in this pass. */
551 unsigned int min_reg
;
552 /* One greater than the last register to be handled in this pass. */
553 unsigned int max_reg
;
554 sbitmap
*nonnull_local
;
555 sbitmap
*nonnull_killed
;
558 static void compute_can_copy
PARAMS ((void));
559 static char *gmalloc
PARAMS ((unsigned int));
560 static char *grealloc
PARAMS ((char *, unsigned int));
561 static char *gcse_alloc
PARAMS ((unsigned long));
562 static void alloc_gcse_mem
PARAMS ((rtx
));
563 static void free_gcse_mem
PARAMS ((void));
564 static void alloc_reg_set_mem
PARAMS ((int));
565 static void free_reg_set_mem
PARAMS ((void));
566 static int get_bitmap_width
PARAMS ((int, int, int));
567 static void record_one_set
PARAMS ((int, rtx
));
568 static void record_set_info
PARAMS ((rtx
, rtx
, void *));
569 static void compute_sets
PARAMS ((rtx
));
570 static void hash_scan_insn
PARAMS ((rtx
, struct hash_table
*, int));
571 static void hash_scan_set
PARAMS ((rtx
, rtx
, struct hash_table
*));
572 static void hash_scan_clobber
PARAMS ((rtx
, rtx
, struct hash_table
*));
573 static void hash_scan_call
PARAMS ((rtx
, rtx
, struct hash_table
*));
574 static int want_to_gcse_p
PARAMS ((rtx
));
575 static int oprs_unchanged_p
PARAMS ((rtx
, rtx
, int));
576 static int oprs_anticipatable_p
PARAMS ((rtx
, rtx
));
577 static int oprs_available_p
PARAMS ((rtx
, rtx
));
578 static void insert_expr_in_table
PARAMS ((rtx
, enum machine_mode
, rtx
,
579 int, int, struct hash_table
*));
580 static void insert_set_in_table
PARAMS ((rtx
, rtx
, struct hash_table
*));
581 static unsigned int hash_expr
PARAMS ((rtx
, enum machine_mode
, int *, int));
582 static unsigned int hash_expr_1
PARAMS ((rtx
, enum machine_mode
, int *));
583 static unsigned int hash_string_1
PARAMS ((const char *));
584 static unsigned int hash_set
PARAMS ((int, int));
585 static int expr_equiv_p
PARAMS ((rtx
, rtx
));
586 static void record_last_reg_set_info
PARAMS ((rtx
, int));
587 static void record_last_mem_set_info
PARAMS ((rtx
));
588 static void record_last_set_info
PARAMS ((rtx
, rtx
, void *));
589 static void compute_hash_table
PARAMS ((struct hash_table
*));
590 static void alloc_hash_table
PARAMS ((int, struct hash_table
*, int));
591 static void free_hash_table
PARAMS ((struct hash_table
*));
592 static void compute_hash_table_work
PARAMS ((struct hash_table
*));
593 static void dump_hash_table
PARAMS ((FILE *, const char *,
594 struct hash_table
*));
595 static struct expr
*lookup_expr
PARAMS ((rtx
, struct hash_table
*));
596 static struct expr
*lookup_set
PARAMS ((unsigned int, rtx
, struct hash_table
*));
597 static struct expr
*next_set
PARAMS ((unsigned int, struct expr
*));
598 static void reset_opr_set_tables
PARAMS ((void));
599 static int oprs_not_set_p
PARAMS ((rtx
, rtx
));
600 static void mark_call
PARAMS ((rtx
));
601 static void mark_set
PARAMS ((rtx
, rtx
));
602 static void mark_clobber
PARAMS ((rtx
, rtx
));
603 static void mark_oprs_set
PARAMS ((rtx
));
604 static void alloc_cprop_mem
PARAMS ((int, int));
605 static void free_cprop_mem
PARAMS ((void));
606 static void compute_transp
PARAMS ((rtx
, int, sbitmap
*, int));
607 static void compute_transpout
PARAMS ((void));
608 static void compute_local_properties
PARAMS ((sbitmap
*, sbitmap
*, sbitmap
*,
609 struct hash_table
*));
610 static void compute_cprop_data
PARAMS ((void));
611 static void find_used_regs
PARAMS ((rtx
*, void *));
612 static int try_replace_reg
PARAMS ((rtx
, rtx
, rtx
));
613 static struct expr
*find_avail_set
PARAMS ((int, rtx
));
614 static int cprop_jump
PARAMS ((basic_block
, rtx
, rtx
, rtx
, rtx
));
615 static void mems_conflict_for_gcse_p
PARAMS ((rtx
, rtx
, void *));
616 static int load_killed_in_block_p
PARAMS ((basic_block
, int, rtx
, int));
617 static void canon_list_insert
PARAMS ((rtx
, rtx
, void *));
618 static int cprop_insn
PARAMS ((rtx
, int));
619 static int cprop
PARAMS ((int));
620 static rtx fis_get_condition
PARAMS ((rtx
));
621 static void find_implicit_sets
PARAMS ((void));
622 static int one_cprop_pass
PARAMS ((int, int, int));
623 static bool constprop_register
PARAMS ((rtx
, rtx
, rtx
, int));
624 static struct expr
*find_bypass_set
PARAMS ((int, int));
625 static int bypass_block
PARAMS ((basic_block
, rtx
, rtx
));
626 static int bypass_conditional_jumps
PARAMS ((void));
627 static void alloc_pre_mem
PARAMS ((int, int));
628 static void free_pre_mem
PARAMS ((void));
629 static void compute_pre_data
PARAMS ((void));
630 static int pre_expr_reaches_here_p
PARAMS ((basic_block
, struct expr
*,
632 static void insert_insn_end_bb
PARAMS ((struct expr
*, basic_block
, int));
633 static void pre_insert_copy_insn
PARAMS ((struct expr
*, rtx
));
634 static void pre_insert_copies
PARAMS ((void));
635 static int pre_delete
PARAMS ((void));
636 static int pre_gcse
PARAMS ((void));
637 static int one_pre_gcse_pass
PARAMS ((int));
638 static void add_label_notes
PARAMS ((rtx
, rtx
));
639 static void alloc_code_hoist_mem
PARAMS ((int, int));
640 static void free_code_hoist_mem
PARAMS ((void));
641 static void compute_code_hoist_vbeinout
PARAMS ((void));
642 static void compute_code_hoist_data
PARAMS ((void));
643 static int hoist_expr_reaches_here_p
PARAMS ((basic_block
, int, basic_block
,
645 static void hoist_code
PARAMS ((void));
646 static int one_code_hoisting_pass
PARAMS ((void));
647 static void alloc_rd_mem
PARAMS ((int, int));
648 static void free_rd_mem
PARAMS ((void));
649 static void handle_rd_kill_set
PARAMS ((rtx
, int, basic_block
));
650 static void compute_kill_rd
PARAMS ((void));
651 static void compute_rd
PARAMS ((void));
652 static void alloc_avail_expr_mem
PARAMS ((int, int));
653 static void free_avail_expr_mem
PARAMS ((void));
654 static void compute_ae_gen
PARAMS ((struct hash_table
*));
655 static int expr_killed_p
PARAMS ((rtx
, basic_block
));
656 static void compute_ae_kill
PARAMS ((sbitmap
*, sbitmap
*, struct hash_table
*));
657 static int expr_reaches_here_p
PARAMS ((struct occr
*, struct expr
*,
659 static rtx computing_insn
PARAMS ((struct expr
*, rtx
));
660 static int def_reaches_here_p
PARAMS ((rtx
, rtx
));
661 static int can_disregard_other_sets
PARAMS ((struct reg_set
**, rtx
, int));
662 static int handle_avail_expr
PARAMS ((rtx
, struct expr
*));
663 static int classic_gcse
PARAMS ((void));
664 static int one_classic_gcse_pass
PARAMS ((int));
665 static void invalidate_nonnull_info
PARAMS ((rtx
, rtx
, void *));
666 static int delete_null_pointer_checks_1
PARAMS ((unsigned int *,
667 sbitmap
*, sbitmap
*,
668 struct null_pointer_info
*));
669 static rtx process_insert_insn
PARAMS ((struct expr
*));
670 static int pre_edge_insert
PARAMS ((struct edge_list
*, struct expr
**));
671 static int expr_reaches_here_p_work
PARAMS ((struct occr
*, struct expr
*,
672 basic_block
, int, char *));
673 static int pre_expr_reaches_here_p_work
PARAMS ((basic_block
, struct expr
*,
674 basic_block
, char *));
675 static struct ls_expr
* ldst_entry
PARAMS ((rtx
));
676 static void free_ldst_entry
PARAMS ((struct ls_expr
*));
677 static void free_ldst_mems
PARAMS ((void));
678 static void print_ldst_list
PARAMS ((FILE *));
679 static struct ls_expr
* find_rtx_in_ldst
PARAMS ((rtx
));
680 static int enumerate_ldsts
PARAMS ((void));
681 static inline struct ls_expr
* first_ls_expr
PARAMS ((void));
682 static inline struct ls_expr
* next_ls_expr
PARAMS ((struct ls_expr
*));
683 static int simple_mem
PARAMS ((rtx
));
684 static void invalidate_any_buried_refs
PARAMS ((rtx
));
685 static void compute_ld_motion_mems
PARAMS ((void));
686 static void trim_ld_motion_mems
PARAMS ((void));
687 static void update_ld_motion_stores
PARAMS ((struct expr
*));
688 static void reg_set_info
PARAMS ((rtx
, rtx
, void *));
689 static int store_ops_ok
PARAMS ((rtx
, basic_block
));
690 static void find_moveable_store
PARAMS ((rtx
));
691 static int compute_store_table
PARAMS ((void));
692 static int load_kills_store
PARAMS ((rtx
, rtx
));
693 static int find_loads
PARAMS ((rtx
, rtx
));
694 static int store_killed_in_insn
PARAMS ((rtx
, rtx
));
695 static int store_killed_after
PARAMS ((rtx
, rtx
, basic_block
));
696 static int store_killed_before
PARAMS ((rtx
, rtx
, basic_block
));
697 static void build_store_vectors
PARAMS ((void));
698 static void insert_insn_start_bb
PARAMS ((rtx
, basic_block
));
699 static int insert_store
PARAMS ((struct ls_expr
*, edge
));
700 static void replace_store_insn
PARAMS ((rtx
, rtx
, basic_block
));
701 static void delete_store
PARAMS ((struct ls_expr
*,
703 static void free_store_memory
PARAMS ((void));
704 static void store_motion
PARAMS ((void));
705 static void free_insn_expr_list_list
PARAMS ((rtx
*));
706 static void clear_modify_mem_tables
PARAMS ((void));
707 static void free_modify_mem_tables
PARAMS ((void));
708 static rtx gcse_emit_move_after
PARAMS ((rtx
, rtx
, rtx
));
709 static void local_cprop_find_used_regs
PARAMS ((rtx
*, void *));
710 static bool do_local_cprop
PARAMS ((rtx
, rtx
, int, rtx
*));
711 static bool adjust_libcall_notes
PARAMS ((rtx
, rtx
, rtx
, rtx
*));
712 static void local_cprop_pass
PARAMS ((int));
714 /* Entry point for global common subexpression elimination.
715 F is the first instruction in the function. */
723 /* Bytes used at start of pass. */
724 int initial_bytes_used
;
725 /* Maximum number of bytes used by a pass. */
727 /* Point to release obstack data from for each pass. */
728 char *gcse_obstack_bottom
;
730 /* We do not construct an accurate cfg in functions which call
731 setjmp, so just punt to be safe. */
732 if (current_function_calls_setjmp
)
735 /* Assume that we do not need to run jump optimizations after gcse. */
736 run_jump_opt_after_gcse
= 0;
738 /* For calling dump_foo fns from gdb. */
739 debug_stderr
= stderr
;
742 /* Identify the basic block information for this function, including
743 successors and predecessors. */
744 max_gcse_regno
= max_reg_num ();
747 dump_flow_info (file
);
749 /* Return if there's nothing to do. */
750 if (n_basic_blocks
<= 1)
753 /* Trying to perform global optimizations on flow graphs which have
754 a high connectivity will take a long time and is unlikely to be
757 In normal circumstances a cfg should have about twice as many edges
758 as blocks. But we do not want to punish small functions which have
759 a couple switch statements. So we require a relatively large number
760 of basic blocks and the ratio of edges to blocks to be high. */
761 if (n_basic_blocks
> 1000 && n_edges
/ n_basic_blocks
>= 20)
763 if (warn_disabled_optimization
)
764 warning ("GCSE disabled: %d > 1000 basic blocks and %d >= 20 edges/basic block",
765 n_basic_blocks
, n_edges
/ n_basic_blocks
);
769 /* If allocating memory for the cprop bitmap would take up too much
770 storage it's better just to disable the optimization. */
772 * SBITMAP_SET_SIZE (max_gcse_regno
)
773 * sizeof (SBITMAP_ELT_TYPE
)) > MAX_GCSE_MEMORY
)
775 if (warn_disabled_optimization
)
776 warning ("GCSE disabled: %d basic blocks and %d registers",
777 n_basic_blocks
, max_gcse_regno
);
782 /* See what modes support reg/reg copy operations. */
783 if (! can_copy_init_p
)
789 gcc_obstack_init (&gcse_obstack
);
793 init_alias_analysis ();
794 /* Record where pseudo-registers are set. This data is kept accurate
795 during each pass. ??? We could also record hard-reg information here
796 [since it's unchanging], however it is currently done during hash table
799 It may be tempting to compute MEM set information here too, but MEM sets
800 will be subject to code motion one day and thus we need to compute
801 information about memory sets when we build the hash tables. */
803 alloc_reg_set_mem (max_gcse_regno
);
807 initial_bytes_used
= bytes_used
;
809 gcse_obstack_bottom
= gcse_alloc (1);
811 while (changed
&& pass
< MAX_GCSE_PASSES
)
815 fprintf (file
, "GCSE pass %d\n\n", pass
+ 1);
817 /* Initialize bytes_used to the space for the pred/succ lists,
818 and the reg_set_table data. */
819 bytes_used
= initial_bytes_used
;
821 /* Each pass may create new registers, so recalculate each time. */
822 max_gcse_regno
= max_reg_num ();
826 /* Don't allow constant propagation to modify jumps
828 changed
= one_cprop_pass (pass
+ 1, 0, 0);
831 changed
|= one_classic_gcse_pass (pass
+ 1);
834 changed
|= one_pre_gcse_pass (pass
+ 1);
835 /* We may have just created new basic blocks. Release and
836 recompute various things which are sized on the number of
840 free_modify_mem_tables ();
842 = (rtx
*) gmalloc (last_basic_block
* sizeof (rtx
));
843 canon_modify_mem_list
844 = (rtx
*) gmalloc (last_basic_block
* sizeof (rtx
));
845 memset ((char *) modify_mem_list
, 0, last_basic_block
* sizeof (rtx
));
846 memset ((char *) canon_modify_mem_list
, 0, last_basic_block
* sizeof (rtx
));
849 alloc_reg_set_mem (max_reg_num ());
851 run_jump_opt_after_gcse
= 1;
854 if (max_pass_bytes
< bytes_used
)
855 max_pass_bytes
= bytes_used
;
857 /* Free up memory, then reallocate for code hoisting. We can
858 not re-use the existing allocated memory because the tables
859 will not have info for the insns or registers created by
860 partial redundancy elimination. */
863 /* It does not make sense to run code hoisting unless we optimizing
864 for code size -- it rarely makes programs faster, and can make
865 them bigger if we did partial redundancy elimination (when optimizing
866 for space, we use a classic gcse algorithm instead of partial
867 redundancy algorithms). */
870 max_gcse_regno
= max_reg_num ();
872 changed
|= one_code_hoisting_pass ();
875 if (max_pass_bytes
< bytes_used
)
876 max_pass_bytes
= bytes_used
;
881 fprintf (file
, "\n");
885 obstack_free (&gcse_obstack
, gcse_obstack_bottom
);
889 /* Do one last pass of copy propagation, including cprop into
890 conditional jumps. */
892 max_gcse_regno
= max_reg_num ();
894 /* This time, go ahead and allow cprop to alter jumps. */
895 one_cprop_pass (pass
+ 1, 1, 0);
900 fprintf (file
, "GCSE of %s: %d basic blocks, ",
901 current_function_name
, n_basic_blocks
);
902 fprintf (file
, "%d pass%s, %d bytes\n\n",
903 pass
, pass
> 1 ? "es" : "", max_pass_bytes
);
906 obstack_free (&gcse_obstack
, NULL
);
908 /* We are finished with alias. */
909 end_alias_analysis ();
910 allocate_reg_info (max_reg_num (), FALSE
, FALSE
);
912 /* Store motion disabled until it is fixed. */
913 if (0 && !optimize_size
&& flag_gcse_sm
)
915 /* Record where pseudo-registers are set. */
916 return run_jump_opt_after_gcse
;
919 /* Misc. utilities. */
921 /* Compute which modes support reg/reg copy operations. */
927 #ifndef AVOID_CCMODE_COPIES
930 memset (can_copy_p
, 0, NUM_MACHINE_MODES
);
933 for (i
= 0; i
< NUM_MACHINE_MODES
; i
++)
934 if (GET_MODE_CLASS (i
) == MODE_CC
)
936 #ifdef AVOID_CCMODE_COPIES
939 reg
= gen_rtx_REG ((enum machine_mode
) i
, LAST_VIRTUAL_REGISTER
+ 1);
940 insn
= emit_insn (gen_rtx_SET (VOIDmode
, reg
, reg
));
941 if (recog (PATTERN (insn
), insn
, NULL
) >= 0)
951 /* Cover function to xmalloc to record bytes allocated. */
958 return xmalloc (size
);
961 /* Cover function to xrealloc.
962 We don't record the additional size since we don't know it.
963 It won't affect memory usage stats much anyway. */
970 return xrealloc (ptr
, size
);
973 /* Cover function to obstack_alloc. */
980 return (char *) obstack_alloc (&gcse_obstack
, size
);
983 /* Allocate memory for the cuid mapping array,
984 and reg/memory set tracking tables.
986 This is called at the start of each pass. */
995 /* Find the largest UID and create a mapping from UIDs to CUIDs.
996 CUIDs are like UIDs except they increase monotonically, have no gaps,
997 and only apply to real insns. */
999 max_uid
= get_max_uid ();
1000 n
= (max_uid
+ 1) * sizeof (int);
1001 uid_cuid
= (int *) gmalloc (n
);
1002 memset ((char *) uid_cuid
, 0, n
);
1003 for (insn
= f
, i
= 0; insn
; insn
= NEXT_INSN (insn
))
1006 uid_cuid
[INSN_UID (insn
)] = i
++;
1008 uid_cuid
[INSN_UID (insn
)] = i
;
1011 /* Create a table mapping cuids to insns. */
1014 n
= (max_cuid
+ 1) * sizeof (rtx
);
1015 cuid_insn
= (rtx
*) gmalloc (n
);
1016 memset ((char *) cuid_insn
, 0, n
);
1017 for (insn
= f
, i
= 0; insn
; insn
= NEXT_INSN (insn
))
1019 CUID_INSN (i
++) = insn
;
1021 /* Allocate vars to track sets of regs. */
1022 reg_set_bitmap
= BITMAP_XMALLOC ();
1024 /* Allocate vars to track sets of regs, memory per block. */
1025 reg_set_in_block
= (sbitmap
*) sbitmap_vector_alloc (last_basic_block
,
1027 /* Allocate array to keep a list of insns which modify memory in each
1029 modify_mem_list
= (rtx
*) gmalloc (last_basic_block
* sizeof (rtx
));
1030 canon_modify_mem_list
= (rtx
*) gmalloc (last_basic_block
* sizeof (rtx
));
1031 memset ((char *) modify_mem_list
, 0, last_basic_block
* sizeof (rtx
));
1032 memset ((char *) canon_modify_mem_list
, 0, last_basic_block
* sizeof (rtx
));
1033 modify_mem_list_set
= BITMAP_XMALLOC ();
1034 canon_modify_mem_list_set
= BITMAP_XMALLOC ();
1037 /* Free memory allocated by alloc_gcse_mem. */
1045 BITMAP_XFREE (reg_set_bitmap
);
1047 sbitmap_vector_free (reg_set_in_block
);
1048 free_modify_mem_tables ();
1049 BITMAP_XFREE (modify_mem_list_set
);
1050 BITMAP_XFREE (canon_modify_mem_list_set
);
1053 /* Many of the global optimization algorithms work by solving dataflow
1054 equations for various expressions. Initially, some local value is
1055 computed for each expression in each block. Then, the values across the
1056 various blocks are combined (by following flow graph edges) to arrive at
1057 global values. Conceptually, each set of equations is independent. We
1058 may therefore solve all the equations in parallel, solve them one at a
1059 time, or pick any intermediate approach.
1061 When you're going to need N two-dimensional bitmaps, each X (say, the
1062 number of blocks) by Y (say, the number of expressions), call this
1063 function. It's not important what X and Y represent; only that Y
1064 correspond to the things that can be done in parallel. This function will
1065 return an appropriate chunking factor C; you should solve C sets of
1066 equations in parallel. By going through this function, we can easily
1067 trade space against time; by solving fewer equations in parallel we use
1071 get_bitmap_width (n
, x
, y
)
1076 /* It's not really worth figuring out *exactly* how much memory will
1077 be used by a particular choice. The important thing is to get
1078 something approximately right. */
1079 size_t max_bitmap_memory
= 10 * 1024 * 1024;
1081 /* The number of bytes we'd use for a single column of minimum
1083 size_t column_size
= n
* x
* sizeof (SBITMAP_ELT_TYPE
);
1085 /* Often, it's reasonable just to solve all the equations in
1087 if (column_size
* SBITMAP_SET_SIZE (y
) <= max_bitmap_memory
)
1090 /* Otherwise, pick the largest width we can, without going over the
1092 return SBITMAP_ELT_BITS
* ((max_bitmap_memory
+ column_size
- 1)
1096 /* Compute the local properties of each recorded expression.
1098 Local properties are those that are defined by the block, irrespective of
1101 An expression is transparent in a block if its operands are not modified
1104 An expression is computed (locally available) in a block if it is computed
1105 at least once and expression would contain the same value if the
1106 computation was moved to the end of the block.
1108 An expression is locally anticipatable in a block if it is computed at
1109 least once and expression would contain the same value if the computation
1110 was moved to the beginning of the block.
1112 We call this routine for cprop, pre and code hoisting. They all compute
1113 basically the same information and thus can easily share this code.
1115 TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
1116 properties. If NULL, then it is not necessary to compute or record that
1117 particular property.
1119 TABLE controls which hash table to look at. If it is set hash table,
1120 additionally, TRANSP is computed as ~TRANSP, since this is really cprop's
1124 compute_local_properties (transp
, comp
, antloc
, table
)
1128 struct hash_table
*table
;
1132 /* Initialize any bitmaps that were passed in. */
1136 sbitmap_vector_zero (transp
, last_basic_block
);
1138 sbitmap_vector_ones (transp
, last_basic_block
);
1142 sbitmap_vector_zero (comp
, last_basic_block
);
1144 sbitmap_vector_zero (antloc
, last_basic_block
);
1146 for (i
= 0; i
< table
->size
; i
++)
1150 for (expr
= table
->table
[i
]; expr
!= NULL
; expr
= expr
->next_same_hash
)
1152 int indx
= expr
->bitmap_index
;
1155 /* The expression is transparent in this block if it is not killed.
1156 We start by assuming all are transparent [none are killed], and
1157 then reset the bits for those that are. */
1159 compute_transp (expr
->expr
, indx
, transp
, table
->set_p
);
1161 /* The occurrences recorded in antic_occr are exactly those that
1162 we want to set to nonzero in ANTLOC. */
1164 for (occr
= expr
->antic_occr
; occr
!= NULL
; occr
= occr
->next
)
1166 SET_BIT (antloc
[BLOCK_NUM (occr
->insn
)], indx
);
1168 /* While we're scanning the table, this is a good place to
1170 occr
->deleted_p
= 0;
1173 /* The occurrences recorded in avail_occr are exactly those that
1174 we want to set to nonzero in COMP. */
1176 for (occr
= expr
->avail_occr
; occr
!= NULL
; occr
= occr
->next
)
1178 SET_BIT (comp
[BLOCK_NUM (occr
->insn
)], indx
);
1180 /* While we're scanning the table, this is a good place to
1185 /* While we're scanning the table, this is a good place to
1187 expr
->reaching_reg
= 0;
1192 /* Register set information.
1194 `reg_set_table' records where each register is set or otherwise
1197 static struct obstack reg_set_obstack
;
1200 alloc_reg_set_mem (n_regs
)
1205 reg_set_table_size
= n_regs
+ REG_SET_TABLE_SLOP
;
1206 n
= reg_set_table_size
* sizeof (struct reg_set
*);
1207 reg_set_table
= (struct reg_set
**) gmalloc (n
);
1208 memset ((char *) reg_set_table
, 0, n
);
1210 gcc_obstack_init (®_set_obstack
);
1216 free (reg_set_table
);
1217 obstack_free (®_set_obstack
, NULL
);
1220 /* Record REGNO in the reg_set table. */
1223 record_one_set (regno
, insn
)
1227 /* Allocate a new reg_set element and link it onto the list. */
1228 struct reg_set
*new_reg_info
;
1230 /* If the table isn't big enough, enlarge it. */
1231 if (regno
>= reg_set_table_size
)
1233 int new_size
= regno
+ REG_SET_TABLE_SLOP
;
1236 = (struct reg_set
**) grealloc ((char *) reg_set_table
,
1237 new_size
* sizeof (struct reg_set
*));
1238 memset ((char *) (reg_set_table
+ reg_set_table_size
), 0,
1239 (new_size
- reg_set_table_size
) * sizeof (struct reg_set
*));
1240 reg_set_table_size
= new_size
;
1243 new_reg_info
= (struct reg_set
*) obstack_alloc (®_set_obstack
,
1244 sizeof (struct reg_set
));
1245 bytes_used
+= sizeof (struct reg_set
);
1246 new_reg_info
->insn
= insn
;
1247 new_reg_info
->next
= reg_set_table
[regno
];
1248 reg_set_table
[regno
] = new_reg_info
;
1251 /* Called from compute_sets via note_stores to handle one SET or CLOBBER in
1252 an insn. The DATA is really the instruction in which the SET is
1256 record_set_info (dest
, setter
, data
)
1257 rtx dest
, setter ATTRIBUTE_UNUSED
;
1260 rtx record_set_insn
= (rtx
) data
;
1262 if (GET_CODE (dest
) == REG
&& REGNO (dest
) >= FIRST_PSEUDO_REGISTER
)
1263 record_one_set (REGNO (dest
), record_set_insn
);
1266 /* Scan the function and record each set of each pseudo-register.
1268 This is called once, at the start of the gcse pass. See the comments for
1269 `reg_set_table' for further documentation. */
1277 for (insn
= f
; insn
!= 0; insn
= NEXT_INSN (insn
))
1279 note_stores (PATTERN (insn
), record_set_info
, insn
);
1282 /* Hash table support. */
1284 struct reg_avail_info
1286 basic_block last_bb
;
1291 static struct reg_avail_info
*reg_avail_info
;
1292 static basic_block current_bb
;
1295 /* See whether X, the source of a set, is something we want to consider for
1298 static GTY(()) rtx test_insn
;
1303 int num_clobbers
= 0;
1306 switch (GET_CODE (x
))
1314 case CONSTANT_P_RTX
:
1321 /* If this is a valid operand, we are OK. If it's VOIDmode, we aren't. */
1322 if (general_operand (x
, GET_MODE (x
)))
1324 else if (GET_MODE (x
) == VOIDmode
)
1327 /* Otherwise, check if we can make a valid insn from it. First initialize
1328 our test insn if we haven't already. */
1332 = make_insn_raw (gen_rtx_SET (VOIDmode
,
1333 gen_rtx_REG (word_mode
,
1334 FIRST_PSEUDO_REGISTER
* 2),
1336 NEXT_INSN (test_insn
) = PREV_INSN (test_insn
) = 0;
1339 /* Now make an insn like the one we would make when GCSE'ing and see if
1341 PUT_MODE (SET_DEST (PATTERN (test_insn
)), GET_MODE (x
));
1342 SET_SRC (PATTERN (test_insn
)) = x
;
1343 return ((icode
= recog (PATTERN (test_insn
), test_insn
, &num_clobbers
)) >= 0
1344 && (num_clobbers
== 0 || ! added_clobbers_hard_reg_p (icode
)));
1347 /* Return nonzero if the operands of expression X are unchanged from the
1348 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1349 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */
1352 oprs_unchanged_p (x
, insn
, avail_p
)
1363 code
= GET_CODE (x
);
1368 struct reg_avail_info
*info
= ®_avail_info
[REGNO (x
)];
1370 if (info
->last_bb
!= current_bb
)
1373 return info
->last_set
< INSN_CUID (insn
);
1375 return info
->first_set
>= INSN_CUID (insn
);
1379 if (load_killed_in_block_p (current_bb
, INSN_CUID (insn
),
1383 return oprs_unchanged_p (XEXP (x
, 0), insn
, avail_p
);
1409 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
1413 /* If we are about to do the last recursive call needed at this
1414 level, change it into iteration. This function is called enough
1417 return oprs_unchanged_p (XEXP (x
, i
), insn
, avail_p
);
1419 else if (! oprs_unchanged_p (XEXP (x
, i
), insn
, avail_p
))
1422 else if (fmt
[i
] == 'E')
1423 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
1424 if (! oprs_unchanged_p (XVECEXP (x
, i
, j
), insn
, avail_p
))
1431 /* Used for communication between mems_conflict_for_gcse_p and
1432 load_killed_in_block_p. Nonzero if mems_conflict_for_gcse_p finds a
1433 conflict between two memory references. */
1434 static int gcse_mems_conflict_p
;
1436 /* Used for communication between mems_conflict_for_gcse_p and
1437 load_killed_in_block_p. A memory reference for a load instruction,
1438 mems_conflict_for_gcse_p will see if a memory store conflicts with
1439 this memory load. */
1440 static rtx gcse_mem_operand
;
1442 /* DEST is the output of an instruction. If it is a memory reference, and
1443 possibly conflicts with the load found in gcse_mem_operand, then set
1444 gcse_mems_conflict_p to a nonzero value. */
1447 mems_conflict_for_gcse_p (dest
, setter
, data
)
1448 rtx dest
, setter ATTRIBUTE_UNUSED
;
1449 void *data ATTRIBUTE_UNUSED
;
1451 while (GET_CODE (dest
) == SUBREG
1452 || GET_CODE (dest
) == ZERO_EXTRACT
1453 || GET_CODE (dest
) == SIGN_EXTRACT
1454 || GET_CODE (dest
) == STRICT_LOW_PART
)
1455 dest
= XEXP (dest
, 0);
1457 /* If DEST is not a MEM, then it will not conflict with the load. Note
1458 that function calls are assumed to clobber memory, but are handled
1460 if (GET_CODE (dest
) != MEM
)
1463 /* If we are setting a MEM in our list of specially recognized MEMs,
1464 don't mark as killed this time. */
1466 if (dest
== gcse_mem_operand
&& pre_ldst_mems
!= NULL
)
1468 if (!find_rtx_in_ldst (dest
))
1469 gcse_mems_conflict_p
= 1;
1473 if (true_dependence (dest
, GET_MODE (dest
), gcse_mem_operand
,
1475 gcse_mems_conflict_p
= 1;
1478 /* Return nonzero if the expression in X (a memory reference) is killed
1479 in block BB before or after the insn with the CUID in UID_LIMIT.
1480 AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
1483 To check the entire block, set UID_LIMIT to max_uid + 1 and
1487 load_killed_in_block_p (bb
, uid_limit
, x
, avail_p
)
1493 rtx list_entry
= modify_mem_list
[bb
->index
];
1497 /* Ignore entries in the list that do not apply. */
1499 && INSN_CUID (XEXP (list_entry
, 0)) < uid_limit
)
1501 && INSN_CUID (XEXP (list_entry
, 0)) > uid_limit
))
1503 list_entry
= XEXP (list_entry
, 1);
1507 setter
= XEXP (list_entry
, 0);
1509 /* If SETTER is a call everything is clobbered. Note that calls
1510 to pure functions are never put on the list, so we need not
1511 worry about them. */
1512 if (GET_CODE (setter
) == CALL_INSN
)
1515 /* SETTER must be an INSN of some kind that sets memory. Call
1516 note_stores to examine each hunk of memory that is modified.
1518 The note_stores interface is pretty limited, so we have to
1519 communicate via global variables. Yuk. */
1520 gcse_mem_operand
= x
;
1521 gcse_mems_conflict_p
= 0;
1522 note_stores (PATTERN (setter
), mems_conflict_for_gcse_p
, NULL
);
1523 if (gcse_mems_conflict_p
)
1525 list_entry
= XEXP (list_entry
, 1);
1530 /* Return nonzero if the operands of expression X are unchanged from
1531 the start of INSN's basic block up to but not including INSN. */
1534 oprs_anticipatable_p (x
, insn
)
1537 return oprs_unchanged_p (x
, insn
, 0);
1540 /* Return nonzero if the operands of expression X are unchanged from
1541 INSN to the end of INSN's basic block. */
1544 oprs_available_p (x
, insn
)
1547 return oprs_unchanged_p (x
, insn
, 1);
1550 /* Hash expression X.
1552 MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean
1553 indicating if a volatile operand is found or if the expression contains
1554 something we don't want to insert in the table.
1556 ??? One might want to merge this with canon_hash. Later. */
1559 hash_expr (x
, mode
, do_not_record_p
, hash_table_size
)
1561 enum machine_mode mode
;
1562 int *do_not_record_p
;
1563 int hash_table_size
;
1567 *do_not_record_p
= 0;
1569 hash
= hash_expr_1 (x
, mode
, do_not_record_p
);
1570 return hash
% hash_table_size
;
1573 /* Hash a string. Just add its bytes up. */
1575 static inline unsigned
1580 const unsigned char *p
= (const unsigned char *) ps
;
1589 /* Subroutine of hash_expr to do the actual work. */
1592 hash_expr_1 (x
, mode
, do_not_record_p
)
1594 enum machine_mode mode
;
1595 int *do_not_record_p
;
1602 /* Used to turn recursion into iteration. We can't rely on GCC's
1603 tail-recursion elimination since we need to keep accumulating values
1610 code
= GET_CODE (x
);
1614 hash
+= ((unsigned int) REG
<< 7) + REGNO (x
);
1618 hash
+= (((unsigned int) CONST_INT
<< 7) + (unsigned int) mode
1619 + (unsigned int) INTVAL (x
));
1623 /* This is like the general case, except that it only counts
1624 the integers representing the constant. */
1625 hash
+= (unsigned int) code
+ (unsigned int) GET_MODE (x
);
1626 if (GET_MODE (x
) != VOIDmode
)
1627 for (i
= 2; i
< GET_RTX_LENGTH (CONST_DOUBLE
); i
++)
1628 hash
+= (unsigned int) XWINT (x
, i
);
1630 hash
+= ((unsigned int) CONST_DOUBLE_LOW (x
)
1631 + (unsigned int) CONST_DOUBLE_HIGH (x
));
1639 units
= CONST_VECTOR_NUNITS (x
);
1641 for (i
= 0; i
< units
; ++i
)
1643 elt
= CONST_VECTOR_ELT (x
, i
);
1644 hash
+= hash_expr_1 (elt
, GET_MODE (elt
), do_not_record_p
);
1650 /* Assume there is only one rtx object for any given label. */
1652 /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1653 differences and differences between each stage's debugging dumps. */
1654 hash
+= (((unsigned int) LABEL_REF
<< 7)
1655 + CODE_LABEL_NUMBER (XEXP (x
, 0)));
1660 /* Don't hash on the symbol's address to avoid bootstrap differences.
1661 Different hash values may cause expressions to be recorded in
1662 different orders and thus different registers to be used in the
1663 final assembler. This also avoids differences in the dump files
1664 between various stages. */
1666 const unsigned char *p
= (const unsigned char *) XSTR (x
, 0);
1669 h
+= (h
<< 7) + *p
++; /* ??? revisit */
1671 hash
+= ((unsigned int) SYMBOL_REF
<< 7) + h
;
1676 if (MEM_VOLATILE_P (x
))
1678 *do_not_record_p
= 1;
1682 hash
+= (unsigned int) MEM
;
1683 /* We used alias set for hashing, but this is not good, since the alias
1684 set may differ in -fprofile-arcs and -fbranch-probabilities compilation
1685 causing the profiles to fail to match. */
1696 case UNSPEC_VOLATILE
:
1697 *do_not_record_p
= 1;
1701 if (MEM_VOLATILE_P (x
))
1703 *do_not_record_p
= 1;
1708 /* We don't want to take the filename and line into account. */
1709 hash
+= (unsigned) code
+ (unsigned) GET_MODE (x
)
1710 + hash_string_1 (ASM_OPERANDS_TEMPLATE (x
))
1711 + hash_string_1 (ASM_OPERANDS_OUTPUT_CONSTRAINT (x
))
1712 + (unsigned) ASM_OPERANDS_OUTPUT_IDX (x
);
1714 if (ASM_OPERANDS_INPUT_LENGTH (x
))
1716 for (i
= 1; i
< ASM_OPERANDS_INPUT_LENGTH (x
); i
++)
1718 hash
+= (hash_expr_1 (ASM_OPERANDS_INPUT (x
, i
),
1719 GET_MODE (ASM_OPERANDS_INPUT (x
, i
)),
1721 + hash_string_1 (ASM_OPERANDS_INPUT_CONSTRAINT
1725 hash
+= hash_string_1 (ASM_OPERANDS_INPUT_CONSTRAINT (x
, 0));
1726 x
= ASM_OPERANDS_INPUT (x
, 0);
1727 mode
= GET_MODE (x
);
1737 hash
+= (unsigned) code
+ (unsigned) GET_MODE (x
);
1738 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
1742 /* If we are about to do the last recursive call
1743 needed at this level, change it into iteration.
1744 This function is called enough to be worth it. */
1751 hash
+= hash_expr_1 (XEXP (x
, i
), 0, do_not_record_p
);
1752 if (*do_not_record_p
)
1756 else if (fmt
[i
] == 'E')
1757 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
1759 hash
+= hash_expr_1 (XVECEXP (x
, i
, j
), 0, do_not_record_p
);
1760 if (*do_not_record_p
)
1764 else if (fmt
[i
] == 's')
1765 hash
+= hash_string_1 (XSTR (x
, i
));
1766 else if (fmt
[i
] == 'i')
1767 hash
+= (unsigned int) XINT (x
, i
);
1775 /* Hash a set of register REGNO.
1777 Sets are hashed on the register that is set. This simplifies the PRE copy
1780 ??? May need to make things more elaborate. Later, as necessary. */
1783 hash_set (regno
, hash_table_size
)
1785 int hash_table_size
;
1790 return hash
% hash_table_size
;
1793 /* Return nonzero if exp1 is equivalent to exp2.
1794 ??? Borrowed from cse.c. Might want to remerge with cse.c. Later. */
1807 if (x
== 0 || y
== 0)
1810 code
= GET_CODE (x
);
1811 if (code
!= GET_CODE (y
))
1814 /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent. */
1815 if (GET_MODE (x
) != GET_MODE (y
))
1825 return INTVAL (x
) == INTVAL (y
);
1828 return XEXP (x
, 0) == XEXP (y
, 0);
1831 return XSTR (x
, 0) == XSTR (y
, 0);
1834 return REGNO (x
) == REGNO (y
);
1837 /* Can't merge two expressions in different alias sets, since we can
1838 decide that the expression is transparent in a block when it isn't,
1839 due to it being set with the different alias set. */
1840 if (MEM_ALIAS_SET (x
) != MEM_ALIAS_SET (y
))
1844 /* For commutative operations, check both orders. */
1852 return ((expr_equiv_p (XEXP (x
, 0), XEXP (y
, 0))
1853 && expr_equiv_p (XEXP (x
, 1), XEXP (y
, 1)))
1854 || (expr_equiv_p (XEXP (x
, 0), XEXP (y
, 1))
1855 && expr_equiv_p (XEXP (x
, 1), XEXP (y
, 0))));
1858 /* We don't use the generic code below because we want to
1859 disregard filename and line numbers. */
1861 /* A volatile asm isn't equivalent to any other. */
1862 if (MEM_VOLATILE_P (x
) || MEM_VOLATILE_P (y
))
1865 if (GET_MODE (x
) != GET_MODE (y
)
1866 || strcmp (ASM_OPERANDS_TEMPLATE (x
), ASM_OPERANDS_TEMPLATE (y
))
1867 || strcmp (ASM_OPERANDS_OUTPUT_CONSTRAINT (x
),
1868 ASM_OPERANDS_OUTPUT_CONSTRAINT (y
))
1869 || ASM_OPERANDS_OUTPUT_IDX (x
) != ASM_OPERANDS_OUTPUT_IDX (y
)
1870 || ASM_OPERANDS_INPUT_LENGTH (x
) != ASM_OPERANDS_INPUT_LENGTH (y
))
1873 if (ASM_OPERANDS_INPUT_LENGTH (x
))
1875 for (i
= ASM_OPERANDS_INPUT_LENGTH (x
) - 1; i
>= 0; i
--)
1876 if (! expr_equiv_p (ASM_OPERANDS_INPUT (x
, i
),
1877 ASM_OPERANDS_INPUT (y
, i
))
1878 || strcmp (ASM_OPERANDS_INPUT_CONSTRAINT (x
, i
),
1879 ASM_OPERANDS_INPUT_CONSTRAINT (y
, i
)))
1889 /* Compare the elements. If any pair of corresponding elements
1890 fail to match, return 0 for the whole thing. */
1892 fmt
= GET_RTX_FORMAT (code
);
1893 for (i
= GET_RTX_LENGTH (code
) - 1; i
>= 0; i
--)
1898 if (! expr_equiv_p (XEXP (x
, i
), XEXP (y
, i
)))
1903 if (XVECLEN (x
, i
) != XVECLEN (y
, i
))
1905 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
1906 if (! expr_equiv_p (XVECEXP (x
, i
, j
), XVECEXP (y
, i
, j
)))
1911 if (strcmp (XSTR (x
, i
), XSTR (y
, i
)))
1916 if (XINT (x
, i
) != XINT (y
, i
))
1921 if (XWINT (x
, i
) != XWINT (y
, i
))
1936 /* Insert expression X in INSN in the hash TABLE.
1937 If it is already present, record it as the last occurrence in INSN's
1940 MODE is the mode of the value X is being stored into.
1941 It is only used if X is a CONST_INT.
1943 ANTIC_P is nonzero if X is an anticipatable expression.
1944 AVAIL_P is nonzero if X is an available expression. */
1947 insert_expr_in_table (x
, mode
, insn
, antic_p
, avail_p
, table
)
1949 enum machine_mode mode
;
1951 int antic_p
, avail_p
;
1952 struct hash_table
*table
;
1954 int found
, do_not_record_p
;
1956 struct expr
*cur_expr
, *last_expr
= NULL
;
1957 struct occr
*antic_occr
, *avail_occr
;
1958 struct occr
*last_occr
= NULL
;
1960 hash
= hash_expr (x
, mode
, &do_not_record_p
, table
->size
);
1962 /* Do not insert expression in table if it contains volatile operands,
1963 or if hash_expr determines the expression is something we don't want
1964 to or can't handle. */
1965 if (do_not_record_p
)
1968 cur_expr
= table
->table
[hash
];
1971 while (cur_expr
&& 0 == (found
= expr_equiv_p (cur_expr
->expr
, x
)))
1973 /* If the expression isn't found, save a pointer to the end of
1975 last_expr
= cur_expr
;
1976 cur_expr
= cur_expr
->next_same_hash
;
1981 cur_expr
= (struct expr
*) gcse_alloc (sizeof (struct expr
));
1982 bytes_used
+= sizeof (struct expr
);
1983 if (table
->table
[hash
] == NULL
)
1984 /* This is the first pattern that hashed to this index. */
1985 table
->table
[hash
] = cur_expr
;
1987 /* Add EXPR to end of this hash chain. */
1988 last_expr
->next_same_hash
= cur_expr
;
1990 /* Set the fields of the expr element. */
1992 cur_expr
->bitmap_index
= table
->n_elems
++;
1993 cur_expr
->next_same_hash
= NULL
;
1994 cur_expr
->antic_occr
= NULL
;
1995 cur_expr
->avail_occr
= NULL
;
1998 /* Now record the occurrence(s). */
2001 antic_occr
= cur_expr
->antic_occr
;
2003 /* Search for another occurrence in the same basic block. */
2004 while (antic_occr
&& BLOCK_NUM (antic_occr
->insn
) != BLOCK_NUM (insn
))
2006 /* If an occurrence isn't found, save a pointer to the end of
2008 last_occr
= antic_occr
;
2009 antic_occr
= antic_occr
->next
;
2013 /* Found another instance of the expression in the same basic block.
2014 Prefer the currently recorded one. We want the first one in the
2015 block and the block is scanned from start to end. */
2016 ; /* nothing to do */
2019 /* First occurrence of this expression in this basic block. */
2020 antic_occr
= (struct occr
*) gcse_alloc (sizeof (struct occr
));
2021 bytes_used
+= sizeof (struct occr
);
2022 /* First occurrence of this expression in any block? */
2023 if (cur_expr
->antic_occr
== NULL
)
2024 cur_expr
->antic_occr
= antic_occr
;
2026 last_occr
->next
= antic_occr
;
2028 antic_occr
->insn
= insn
;
2029 antic_occr
->next
= NULL
;
2035 avail_occr
= cur_expr
->avail_occr
;
2037 /* Search for another occurrence in the same basic block. */
2038 while (avail_occr
&& BLOCK_NUM (avail_occr
->insn
) != BLOCK_NUM (insn
))
2040 /* If an occurrence isn't found, save a pointer to the end of
2042 last_occr
= avail_occr
;
2043 avail_occr
= avail_occr
->next
;
2047 /* Found another instance of the expression in the same basic block.
2048 Prefer this occurrence to the currently recorded one. We want
2049 the last one in the block and the block is scanned from start
2051 avail_occr
->insn
= insn
;
2054 /* First occurrence of this expression in this basic block. */
2055 avail_occr
= (struct occr
*) gcse_alloc (sizeof (struct occr
));
2056 bytes_used
+= sizeof (struct occr
);
2058 /* First occurrence of this expression in any block? */
2059 if (cur_expr
->avail_occr
== NULL
)
2060 cur_expr
->avail_occr
= avail_occr
;
2062 last_occr
->next
= avail_occr
;
2064 avail_occr
->insn
= insn
;
2065 avail_occr
->next
= NULL
;
2070 /* Insert pattern X in INSN in the hash table.
2071 X is a SET of a reg to either another reg or a constant.
2072 If it is already present, record it as the last occurrence in INSN's
2076 insert_set_in_table (x
, insn
, table
)
2079 struct hash_table
*table
;
2083 struct expr
*cur_expr
, *last_expr
= NULL
;
2084 struct occr
*cur_occr
, *last_occr
= NULL
;
2086 if (GET_CODE (x
) != SET
2087 || GET_CODE (SET_DEST (x
)) != REG
)
2090 hash
= hash_set (REGNO (SET_DEST (x
)), table
->size
);
2092 cur_expr
= table
->table
[hash
];
2095 while (cur_expr
&& 0 == (found
= expr_equiv_p (cur_expr
->expr
, x
)))
2097 /* If the expression isn't found, save a pointer to the end of
2099 last_expr
= cur_expr
;
2100 cur_expr
= cur_expr
->next_same_hash
;
2105 cur_expr
= (struct expr
*) gcse_alloc (sizeof (struct expr
));
2106 bytes_used
+= sizeof (struct expr
);
2107 if (table
->table
[hash
] == NULL
)
2108 /* This is the first pattern that hashed to this index. */
2109 table
->table
[hash
] = cur_expr
;
2111 /* Add EXPR to end of this hash chain. */
2112 last_expr
->next_same_hash
= cur_expr
;
2114 /* Set the fields of the expr element.
2115 We must copy X because it can be modified when copy propagation is
2116 performed on its operands. */
2117 cur_expr
->expr
= copy_rtx (x
);
2118 cur_expr
->bitmap_index
= table
->n_elems
++;
2119 cur_expr
->next_same_hash
= NULL
;
2120 cur_expr
->antic_occr
= NULL
;
2121 cur_expr
->avail_occr
= NULL
;
2124 /* Now record the occurrence. */
2125 cur_occr
= cur_expr
->avail_occr
;
2127 /* Search for another occurrence in the same basic block. */
2128 while (cur_occr
&& BLOCK_NUM (cur_occr
->insn
) != BLOCK_NUM (insn
))
2130 /* If an occurrence isn't found, save a pointer to the end of
2132 last_occr
= cur_occr
;
2133 cur_occr
= cur_occr
->next
;
2137 /* Found another instance of the expression in the same basic block.
2138 Prefer this occurrence to the currently recorded one. We want the
2139 last one in the block and the block is scanned from start to end. */
2140 cur_occr
->insn
= insn
;
2143 /* First occurrence of this expression in this basic block. */
2144 cur_occr
= (struct occr
*) gcse_alloc (sizeof (struct occr
));
2145 bytes_used
+= sizeof (struct occr
);
2147 /* First occurrence of this expression in any block? */
2148 if (cur_expr
->avail_occr
== NULL
)
2149 cur_expr
->avail_occr
= cur_occr
;
2151 last_occr
->next
= cur_occr
;
2153 cur_occr
->insn
= insn
;
2154 cur_occr
->next
= NULL
;
2158 /* Scan pattern PAT of INSN and add an entry to the hash TABLE (set or
2162 hash_scan_set (pat
, insn
, table
)
2164 struct hash_table
*table
;
2166 rtx src
= SET_SRC (pat
);
2167 rtx dest
= SET_DEST (pat
);
2170 if (GET_CODE (src
) == CALL
)
2171 hash_scan_call (src
, insn
, table
);
2173 else if (GET_CODE (dest
) == REG
)
2175 unsigned int regno
= REGNO (dest
);
2178 /* If this is a single set and we are doing constant propagation,
2179 see if a REG_NOTE shows this equivalent to a constant. */
2180 if (table
->set_p
&& (note
= find_reg_equal_equiv_note (insn
)) != 0
2181 && CONSTANT_P (XEXP (note
, 0)))
2182 src
= XEXP (note
, 0), pat
= gen_rtx_SET (VOIDmode
, dest
, src
);
2184 /* Only record sets of pseudo-regs in the hash table. */
2186 && regno
>= FIRST_PSEUDO_REGISTER
2187 /* Don't GCSE something if we can't do a reg/reg copy. */
2188 && can_copy_p
[GET_MODE (dest
)]
2189 /* GCSE commonly inserts instruction after the insn. We can't
2190 do that easily for EH_REGION notes so disable GCSE on these
2192 && !find_reg_note (insn
, REG_EH_REGION
, NULL_RTX
)
2193 /* Is SET_SRC something we want to gcse? */
2194 && want_to_gcse_p (src
)
2195 /* Don't CSE a nop. */
2196 && ! set_noop_p (pat
)
2197 /* Don't GCSE if it has attached REG_EQUIV note.
2198 At this point this only function parameters should have
2199 REG_EQUIV notes and if the argument slot is used somewhere
2200 explicitly, it means address of parameter has been taken,
2201 so we should not extend the lifetime of the pseudo. */
2202 && ((note
= find_reg_note (insn
, REG_EQUIV
, NULL_RTX
)) == 0
2203 || GET_CODE (XEXP (note
, 0)) != MEM
))
2205 /* An expression is not anticipatable if its operands are
2206 modified before this insn or if this is not the only SET in
2208 int antic_p
= oprs_anticipatable_p (src
, insn
) && single_set (insn
);
2209 /* An expression is not available if its operands are
2210 subsequently modified, including this insn. It's also not
2211 available if this is a branch, because we can't insert
2212 a set after the branch. */
2213 int avail_p
= (oprs_available_p (src
, insn
)
2214 && ! JUMP_P (insn
));
2216 insert_expr_in_table (src
, GET_MODE (dest
), insn
, antic_p
, avail_p
, table
);
2219 /* Record sets for constant/copy propagation. */
2220 else if (table
->set_p
2221 && regno
>= FIRST_PSEUDO_REGISTER
2222 && ((GET_CODE (src
) == REG
2223 && REGNO (src
) >= FIRST_PSEUDO_REGISTER
2224 && can_copy_p
[GET_MODE (dest
)]
2225 && REGNO (src
) != regno
)
2226 || (CONSTANT_P (src
)
2227 && GET_CODE (src
) != CONSTANT_P_RTX
))
2228 /* A copy is not available if its src or dest is subsequently
2229 modified. Here we want to search from INSN+1 on, but
2230 oprs_available_p searches from INSN on. */
2231 && (insn
== BLOCK_END (BLOCK_NUM (insn
))
2232 || ((tmp
= next_nonnote_insn (insn
)) != NULL_RTX
2233 && oprs_available_p (pat
, tmp
))))
2234 insert_set_in_table (pat
, insn
, table
);
2239 hash_scan_clobber (x
, insn
, table
)
2240 rtx x ATTRIBUTE_UNUSED
, insn ATTRIBUTE_UNUSED
;
2241 struct hash_table
*table ATTRIBUTE_UNUSED
;
2243 /* Currently nothing to do. */
2247 hash_scan_call (x
, insn
, table
)
2248 rtx x ATTRIBUTE_UNUSED
, insn ATTRIBUTE_UNUSED
;
2249 struct hash_table
*table ATTRIBUTE_UNUSED
;
2251 /* Currently nothing to do. */
2254 /* Process INSN and add hash table entries as appropriate.
2256 Only available expressions that set a single pseudo-reg are recorded.
2258 Single sets in a PARALLEL could be handled, but it's an extra complication
2259 that isn't dealt with right now. The trick is handling the CLOBBERs that
2260 are also in the PARALLEL. Later.
2262 If SET_P is nonzero, this is for the assignment hash table,
2263 otherwise it is for the expression hash table.
2264 If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
2265 not record any expressions. */
2268 hash_scan_insn (insn
, table
, in_libcall_block
)
2270 struct hash_table
*table
;
2271 int in_libcall_block
;
2273 rtx pat
= PATTERN (insn
);
2276 if (in_libcall_block
)
2279 /* Pick out the sets of INSN and for other forms of instructions record
2280 what's been modified. */
2282 if (GET_CODE (pat
) == SET
)
2283 hash_scan_set (pat
, insn
, table
);
2284 else if (GET_CODE (pat
) == PARALLEL
)
2285 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
2287 rtx x
= XVECEXP (pat
, 0, i
);
2289 if (GET_CODE (x
) == SET
)
2290 hash_scan_set (x
, insn
, table
);
2291 else if (GET_CODE (x
) == CLOBBER
)
2292 hash_scan_clobber (x
, insn
, table
);
2293 else if (GET_CODE (x
) == CALL
)
2294 hash_scan_call (x
, insn
, table
);
2297 else if (GET_CODE (pat
) == CLOBBER
)
2298 hash_scan_clobber (pat
, insn
, table
);
2299 else if (GET_CODE (pat
) == CALL
)
2300 hash_scan_call (pat
, insn
, table
);
2304 dump_hash_table (file
, name
, table
)
2307 struct hash_table
*table
;
2310 /* Flattened out table, so it's printed in proper order. */
2311 struct expr
**flat_table
;
2312 unsigned int *hash_val
;
2316 = (struct expr
**) xcalloc (table
->n_elems
, sizeof (struct expr
*));
2317 hash_val
= (unsigned int *) xmalloc (table
->n_elems
* sizeof (unsigned int));
2319 for (i
= 0; i
< (int) table
->size
; i
++)
2320 for (expr
= table
->table
[i
]; expr
!= NULL
; expr
= expr
->next_same_hash
)
2322 flat_table
[expr
->bitmap_index
] = expr
;
2323 hash_val
[expr
->bitmap_index
] = i
;
2326 fprintf (file
, "%s hash table (%d buckets, %d entries)\n",
2327 name
, table
->size
, table
->n_elems
);
2329 for (i
= 0; i
< (int) table
->n_elems
; i
++)
2330 if (flat_table
[i
] != 0)
2332 expr
= flat_table
[i
];
2333 fprintf (file
, "Index %d (hash value %d)\n ",
2334 expr
->bitmap_index
, hash_val
[i
]);
2335 print_rtl (file
, expr
->expr
);
2336 fprintf (file
, "\n");
2339 fprintf (file
, "\n");
2345 /* Record register first/last/block set information for REGNO in INSN.
2347 first_set records the first place in the block where the register
2348 is set and is used to compute "anticipatability".
2350 last_set records the last place in the block where the register
2351 is set and is used to compute "availability".
2353 last_bb records the block for which first_set and last_set are
2354 valid, as a quick test to invalidate them.
2356 reg_set_in_block records whether the register is set in the block
2357 and is used to compute "transparency". */
2360 record_last_reg_set_info (insn
, regno
)
2364 struct reg_avail_info
*info
= ®_avail_info
[regno
];
2365 int cuid
= INSN_CUID (insn
);
2367 info
->last_set
= cuid
;
2368 if (info
->last_bb
!= current_bb
)
2370 info
->last_bb
= current_bb
;
2371 info
->first_set
= cuid
;
2372 SET_BIT (reg_set_in_block
[current_bb
->index
], regno
);
2377 /* Record all of the canonicalized MEMs of record_last_mem_set_info's insn.
2378 Note we store a pair of elements in the list, so they have to be
2379 taken off pairwise. */
2382 canon_list_insert (dest
, unused1
, v_insn
)
2383 rtx dest ATTRIBUTE_UNUSED
;
2384 rtx unused1 ATTRIBUTE_UNUSED
;
2387 rtx dest_addr
, insn
;
2390 while (GET_CODE (dest
) == SUBREG
2391 || GET_CODE (dest
) == ZERO_EXTRACT
2392 || GET_CODE (dest
) == SIGN_EXTRACT
2393 || GET_CODE (dest
) == STRICT_LOW_PART
)
2394 dest
= XEXP (dest
, 0);
2396 /* If DEST is not a MEM, then it will not conflict with a load. Note
2397 that function calls are assumed to clobber memory, but are handled
2400 if (GET_CODE (dest
) != MEM
)
2403 dest_addr
= get_addr (XEXP (dest
, 0));
2404 dest_addr
= canon_rtx (dest_addr
);
2405 insn
= (rtx
) v_insn
;
2406 bb
= BLOCK_NUM (insn
);
2408 canon_modify_mem_list
[bb
] =
2409 alloc_EXPR_LIST (VOIDmode
, dest_addr
, canon_modify_mem_list
[bb
]);
2410 canon_modify_mem_list
[bb
] =
2411 alloc_EXPR_LIST (VOIDmode
, dest
, canon_modify_mem_list
[bb
]);
2412 bitmap_set_bit (canon_modify_mem_list_set
, bb
);
2415 /* Record memory modification information for INSN. We do not actually care
2416 about the memory location(s) that are set, or even how they are set (consider
2417 a CALL_INSN). We merely need to record which insns modify memory. */
2420 record_last_mem_set_info (insn
)
2423 int bb
= BLOCK_NUM (insn
);
2425 /* load_killed_in_block_p will handle the case of calls clobbering
2427 modify_mem_list
[bb
] = alloc_INSN_LIST (insn
, modify_mem_list
[bb
]);
2428 bitmap_set_bit (modify_mem_list_set
, bb
);
2430 if (GET_CODE (insn
) == CALL_INSN
)
2432 /* Note that traversals of this loop (other than for free-ing)
2433 will break after encountering a CALL_INSN. So, there's no
2434 need to insert a pair of items, as canon_list_insert does. */
2435 canon_modify_mem_list
[bb
] =
2436 alloc_INSN_LIST (insn
, canon_modify_mem_list
[bb
]);
2437 bitmap_set_bit (canon_modify_mem_list_set
, bb
);
2440 note_stores (PATTERN (insn
), canon_list_insert
, (void*) insn
);
2443 /* Called from compute_hash_table via note_stores to handle one
2444 SET or CLOBBER in an insn. DATA is really the instruction in which
2445 the SET is taking place. */
2448 record_last_set_info (dest
, setter
, data
)
2449 rtx dest
, setter ATTRIBUTE_UNUSED
;
2452 rtx last_set_insn
= (rtx
) data
;
2454 if (GET_CODE (dest
) == SUBREG
)
2455 dest
= SUBREG_REG (dest
);
2457 if (GET_CODE (dest
) == REG
)
2458 record_last_reg_set_info (last_set_insn
, REGNO (dest
));
2459 else if (GET_CODE (dest
) == MEM
2460 /* Ignore pushes, they clobber nothing. */
2461 && ! push_operand (dest
, GET_MODE (dest
)))
2462 record_last_mem_set_info (last_set_insn
);
2465 /* Top level function to create an expression or assignment hash table.
2467 Expression entries are placed in the hash table if
2468 - they are of the form (set (pseudo-reg) src),
2469 - src is something we want to perform GCSE on,
2470 - none of the operands are subsequently modified in the block
2472 Assignment entries are placed in the hash table if
2473 - they are of the form (set (pseudo-reg) src),
2474 - src is something we want to perform const/copy propagation on,
2475 - none of the operands or target are subsequently modified in the block
2477 Currently src must be a pseudo-reg or a const_int.
2479 TABLE is the table computed. */
2482 compute_hash_table_work (table
)
2483 struct hash_table
*table
;
2487 /* While we compute the hash table we also compute a bit array of which
2488 registers are set in which blocks.
2489 ??? This isn't needed during const/copy propagation, but it's cheap to
2491 sbitmap_vector_zero (reg_set_in_block
, last_basic_block
);
2493 /* re-Cache any INSN_LIST nodes we have allocated. */
2494 clear_modify_mem_tables ();
2495 /* Some working arrays used to track first and last set in each block. */
2496 reg_avail_info
= (struct reg_avail_info
*)
2497 gmalloc (max_gcse_regno
* sizeof (struct reg_avail_info
));
2499 for (i
= 0; i
< max_gcse_regno
; ++i
)
2500 reg_avail_info
[i
].last_bb
= NULL
;
2502 FOR_EACH_BB (current_bb
)
2506 int in_libcall_block
;
2508 /* First pass over the instructions records information used to
2509 determine when registers and memory are first and last set.
2510 ??? hard-reg reg_set_in_block computation
2511 could be moved to compute_sets since they currently don't change. */
2513 for (insn
= current_bb
->head
;
2514 insn
&& insn
!= NEXT_INSN (current_bb
->end
);
2515 insn
= NEXT_INSN (insn
))
2517 if (! INSN_P (insn
))
2520 if (GET_CODE (insn
) == CALL_INSN
)
2522 bool clobbers_all
= false;
2523 #ifdef NON_SAVING_SETJMP
2524 if (NON_SAVING_SETJMP
2525 && find_reg_note (insn
, REG_SETJMP
, NULL_RTX
))
2526 clobbers_all
= true;
2529 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
2531 || TEST_HARD_REG_BIT (regs_invalidated_by_call
, regno
))
2532 record_last_reg_set_info (insn
, regno
);
2537 note_stores (PATTERN (insn
), record_last_set_info
, insn
);
2540 /* Insert implicit sets in the hash table. */
2542 && implicit_sets
[current_bb
->index
] != NULL_RTX
)
2543 hash_scan_set (implicit_sets
[current_bb
->index
],
2544 current_bb
->head
, table
);
2546 /* The next pass builds the hash table. */
2548 for (insn
= current_bb
->head
, in_libcall_block
= 0;
2549 insn
&& insn
!= NEXT_INSN (current_bb
->end
);
2550 insn
= NEXT_INSN (insn
))
2553 if (find_reg_note (insn
, REG_LIBCALL
, NULL_RTX
))
2554 in_libcall_block
= 1;
2555 else if (table
->set_p
&& find_reg_note (insn
, REG_RETVAL
, NULL_RTX
))
2556 in_libcall_block
= 0;
2557 hash_scan_insn (insn
, table
, in_libcall_block
);
2558 if (!table
->set_p
&& find_reg_note (insn
, REG_RETVAL
, NULL_RTX
))
2559 in_libcall_block
= 0;
2563 free (reg_avail_info
);
2564 reg_avail_info
= NULL
;
2567 /* Allocate space for the set/expr hash TABLE.
2568 N_INSNS is the number of instructions in the function.
2569 It is used to determine the number of buckets to use.
2570 SET_P determines whether set or expression table will
2574 alloc_hash_table (n_insns
, table
, set_p
)
2576 struct hash_table
*table
;
2581 table
->size
= n_insns
/ 4;
2582 if (table
->size
< 11)
2585 /* Attempt to maintain efficient use of hash table.
2586 Making it an odd number is simplest for now.
2587 ??? Later take some measurements. */
2589 n
= table
->size
* sizeof (struct expr
*);
2590 table
->table
= (struct expr
**) gmalloc (n
);
2591 table
->set_p
= set_p
;
2594 /* Free things allocated by alloc_hash_table. */
2597 free_hash_table (table
)
2598 struct hash_table
*table
;
2600 free (table
->table
);
2603 /* Compute the hash TABLE for doing copy/const propagation or
2604 expression hash table. */
2607 compute_hash_table (table
)
2608 struct hash_table
*table
;
2610 /* Initialize count of number of entries in hash table. */
2612 memset ((char *) table
->table
, 0,
2613 table
->size
* sizeof (struct expr
*));
2615 compute_hash_table_work (table
);
2618 /* Expression tracking support. */
2620 /* Lookup pattern PAT in the expression TABLE.
2621 The result is a pointer to the table entry, or NULL if not found. */
2623 static struct expr
*
2624 lookup_expr (pat
, table
)
2626 struct hash_table
*table
;
2628 int do_not_record_p
;
2629 unsigned int hash
= hash_expr (pat
, GET_MODE (pat
), &do_not_record_p
,
2633 if (do_not_record_p
)
2636 expr
= table
->table
[hash
];
2638 while (expr
&& ! expr_equiv_p (expr
->expr
, pat
))
2639 expr
= expr
->next_same_hash
;
2644 /* Lookup REGNO in the set TABLE. If PAT is non-NULL look for the entry that
2645 matches it, otherwise return the first entry for REGNO. The result is a
2646 pointer to the table entry, or NULL if not found. */
2648 static struct expr
*
2649 lookup_set (regno
, pat
, table
)
2652 struct hash_table
*table
;
2654 unsigned int hash
= hash_set (regno
, table
->size
);
2657 expr
= table
->table
[hash
];
2661 while (expr
&& ! expr_equiv_p (expr
->expr
, pat
))
2662 expr
= expr
->next_same_hash
;
2666 while (expr
&& REGNO (SET_DEST (expr
->expr
)) != regno
)
2667 expr
= expr
->next_same_hash
;
2673 /* Return the next entry for REGNO in list EXPR. */
2675 static struct expr
*
2676 next_set (regno
, expr
)
2681 expr
= expr
->next_same_hash
;
2682 while (expr
&& REGNO (SET_DEST (expr
->expr
)) != regno
);
2687 /* Like free_INSN_LIST_list or free_EXPR_LIST_list, except that the node
2688 types may be mixed. */
2691 free_insn_expr_list_list (listp
)
2696 for (list
= *listp
; list
; list
= next
)
2698 next
= XEXP (list
, 1);
2699 if (GET_CODE (list
) == EXPR_LIST
)
2700 free_EXPR_LIST_node (list
);
2702 free_INSN_LIST_node (list
);
2708 /* Clear canon_modify_mem_list and modify_mem_list tables. */
2710 clear_modify_mem_tables ()
2714 EXECUTE_IF_SET_IN_BITMAP
2715 (modify_mem_list_set
, 0, i
, free_INSN_LIST_list (modify_mem_list
+ i
));
2716 bitmap_clear (modify_mem_list_set
);
2718 EXECUTE_IF_SET_IN_BITMAP
2719 (canon_modify_mem_list_set
, 0, i
,
2720 free_insn_expr_list_list (canon_modify_mem_list
+ i
));
2721 bitmap_clear (canon_modify_mem_list_set
);
2724 /* Release memory used by modify_mem_list_set and canon_modify_mem_list_set. */
2727 free_modify_mem_tables ()
2729 clear_modify_mem_tables ();
2730 free (modify_mem_list
);
2731 free (canon_modify_mem_list
);
2732 modify_mem_list
= 0;
2733 canon_modify_mem_list
= 0;
2736 /* Reset tables used to keep track of what's still available [since the
2737 start of the block]. */
2740 reset_opr_set_tables ()
2742 /* Maintain a bitmap of which regs have been set since beginning of
2744 CLEAR_REG_SET (reg_set_bitmap
);
2746 /* Also keep a record of the last instruction to modify memory.
2747 For now this is very trivial, we only record whether any memory
2748 location has been modified. */
2749 clear_modify_mem_tables ();
2752 /* Return nonzero if the operands of X are not set before INSN in
2753 INSN's basic block. */
2756 oprs_not_set_p (x
, insn
)
2766 code
= GET_CODE (x
);
2782 if (load_killed_in_block_p (BLOCK_FOR_INSN (insn
),
2783 INSN_CUID (insn
), x
, 0))
2786 return oprs_not_set_p (XEXP (x
, 0), insn
);
2789 return ! REGNO_REG_SET_P (reg_set_bitmap
, REGNO (x
));
2795 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
2799 /* If we are about to do the last recursive call
2800 needed at this level, change it into iteration.
2801 This function is called enough to be worth it. */
2803 return oprs_not_set_p (XEXP (x
, i
), insn
);
2805 if (! oprs_not_set_p (XEXP (x
, i
), insn
))
2808 else if (fmt
[i
] == 'E')
2809 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
2810 if (! oprs_not_set_p (XVECEXP (x
, i
, j
), insn
))
2817 /* Mark things set by a CALL. */
2823 if (! CONST_OR_PURE_CALL_P (insn
))
2824 record_last_mem_set_info (insn
);
2827 /* Mark things set by a SET. */
2830 mark_set (pat
, insn
)
2833 rtx dest
= SET_DEST (pat
);
2835 while (GET_CODE (dest
) == SUBREG
2836 || GET_CODE (dest
) == ZERO_EXTRACT
2837 || GET_CODE (dest
) == SIGN_EXTRACT
2838 || GET_CODE (dest
) == STRICT_LOW_PART
)
2839 dest
= XEXP (dest
, 0);
2841 if (GET_CODE (dest
) == REG
)
2842 SET_REGNO_REG_SET (reg_set_bitmap
, REGNO (dest
));
2843 else if (GET_CODE (dest
) == MEM
)
2844 record_last_mem_set_info (insn
);
2846 if (GET_CODE (SET_SRC (pat
)) == CALL
)
2850 /* Record things set by a CLOBBER. */
2853 mark_clobber (pat
, insn
)
2856 rtx clob
= XEXP (pat
, 0);
2858 while (GET_CODE (clob
) == SUBREG
|| GET_CODE (clob
) == STRICT_LOW_PART
)
2859 clob
= XEXP (clob
, 0);
2861 if (GET_CODE (clob
) == REG
)
2862 SET_REGNO_REG_SET (reg_set_bitmap
, REGNO (clob
));
2864 record_last_mem_set_info (insn
);
2867 /* Record things set by INSN.
2868 This data is used by oprs_not_set_p. */
2871 mark_oprs_set (insn
)
2874 rtx pat
= PATTERN (insn
);
2877 if (GET_CODE (pat
) == SET
)
2878 mark_set (pat
, insn
);
2879 else if (GET_CODE (pat
) == PARALLEL
)
2880 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
2882 rtx x
= XVECEXP (pat
, 0, i
);
2884 if (GET_CODE (x
) == SET
)
2886 else if (GET_CODE (x
) == CLOBBER
)
2887 mark_clobber (x
, insn
);
2888 else if (GET_CODE (x
) == CALL
)
2892 else if (GET_CODE (pat
) == CLOBBER
)
2893 mark_clobber (pat
, insn
);
2894 else if (GET_CODE (pat
) == CALL
)
2899 /* Classic GCSE reaching definition support. */
2901 /* Allocate reaching def variables. */
2904 alloc_rd_mem (n_blocks
, n_insns
)
2905 int n_blocks
, n_insns
;
2907 rd_kill
= (sbitmap
*) sbitmap_vector_alloc (n_blocks
, n_insns
);
2908 sbitmap_vector_zero (rd_kill
, n_blocks
);
2910 rd_gen
= (sbitmap
*) sbitmap_vector_alloc (n_blocks
, n_insns
);
2911 sbitmap_vector_zero (rd_gen
, n_blocks
);
2913 reaching_defs
= (sbitmap
*) sbitmap_vector_alloc (n_blocks
, n_insns
);
2914 sbitmap_vector_zero (reaching_defs
, n_blocks
);
2916 rd_out
= (sbitmap
*) sbitmap_vector_alloc (n_blocks
, n_insns
);
2917 sbitmap_vector_zero (rd_out
, n_blocks
);
2920 /* Free reaching def variables. */
2925 sbitmap_vector_free (rd_kill
);
2926 sbitmap_vector_free (rd_gen
);
2927 sbitmap_vector_free (reaching_defs
);
2928 sbitmap_vector_free (rd_out
);
2931 /* Add INSN to the kills of BB. REGNO, set in BB, is killed by INSN. */
2934 handle_rd_kill_set (insn
, regno
, bb
)
2939 struct reg_set
*this_reg
;
2941 for (this_reg
= reg_set_table
[regno
]; this_reg
; this_reg
= this_reg
->next
)
2942 if (BLOCK_NUM (this_reg
->insn
) != BLOCK_NUM (insn
))
2943 SET_BIT (rd_kill
[bb
->index
], INSN_CUID (this_reg
->insn
));
2946 /* Compute the set of kill's for reaching definitions. */
2957 For each set bit in `gen' of the block (i.e each insn which
2958 generates a definition in the block)
2959 Call the reg set by the insn corresponding to that bit regx
2960 Look at the linked list starting at reg_set_table[regx]
2961 For each setting of regx in the linked list, which is not in
2963 Set the bit in `kill' corresponding to that insn. */
2965 for (cuid
= 0; cuid
< max_cuid
; cuid
++)
2966 if (TEST_BIT (rd_gen
[bb
->index
], cuid
))
2968 rtx insn
= CUID_INSN (cuid
);
2969 rtx pat
= PATTERN (insn
);
2971 if (GET_CODE (insn
) == CALL_INSN
)
2973 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
2974 if (TEST_HARD_REG_BIT (regs_invalidated_by_call
, regno
))
2975 handle_rd_kill_set (insn
, regno
, bb
);
2978 if (GET_CODE (pat
) == PARALLEL
)
2980 for (i
= XVECLEN (pat
, 0) - 1; i
>= 0; i
--)
2982 enum rtx_code code
= GET_CODE (XVECEXP (pat
, 0, i
));
2984 if ((code
== SET
|| code
== CLOBBER
)
2985 && GET_CODE (XEXP (XVECEXP (pat
, 0, i
), 0)) == REG
)
2986 handle_rd_kill_set (insn
,
2987 REGNO (XEXP (XVECEXP (pat
, 0, i
), 0)),
2991 else if (GET_CODE (pat
) == SET
&& GET_CODE (SET_DEST (pat
)) == REG
)
2992 /* Each setting of this register outside of this block
2993 must be marked in the set of kills in this block. */
2994 handle_rd_kill_set (insn
, REGNO (SET_DEST (pat
)), bb
);
2998 /* Compute the reaching definitions as in
2999 Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
3000 Chapter 10. It is the same algorithm as used for computing available
3001 expressions but applied to the gens and kills of reaching definitions. */
3006 int changed
, passes
;
3010 sbitmap_copy (rd_out
[bb
->index
] /*dst*/, rd_gen
[bb
->index
] /*src*/);
3019 sbitmap_union_of_preds (reaching_defs
[bb
->index
], rd_out
, bb
->index
);
3020 changed
|= sbitmap_union_of_diff_cg (rd_out
[bb
->index
], rd_gen
[bb
->index
],
3021 reaching_defs
[bb
->index
], rd_kill
[bb
->index
]);
3027 fprintf (gcse_file
, "reaching def computation: %d passes\n", passes
);
3030 /* Classic GCSE available expression support. */
3032 /* Allocate memory for available expression computation. */
3035 alloc_avail_expr_mem (n_blocks
, n_exprs
)
3036 int n_blocks
, n_exprs
;
3038 ae_kill
= (sbitmap
*) sbitmap_vector_alloc (n_blocks
, n_exprs
);
3039 sbitmap_vector_zero (ae_kill
, n_blocks
);
3041 ae_gen
= (sbitmap
*) sbitmap_vector_alloc (n_blocks
, n_exprs
);
3042 sbitmap_vector_zero (ae_gen
, n_blocks
);
3044 ae_in
= (sbitmap
*) sbitmap_vector_alloc (n_blocks
, n_exprs
);
3045 sbitmap_vector_zero (ae_in
, n_blocks
);
3047 ae_out
= (sbitmap
*) sbitmap_vector_alloc (n_blocks
, n_exprs
);
3048 sbitmap_vector_zero (ae_out
, n_blocks
);
3052 free_avail_expr_mem ()
3054 sbitmap_vector_free (ae_kill
);
3055 sbitmap_vector_free (ae_gen
);
3056 sbitmap_vector_free (ae_in
);
3057 sbitmap_vector_free (ae_out
);
3060 /* Compute the set of available expressions generated in each basic block. */
3063 compute_ae_gen (expr_hash_table
)
3064 struct hash_table
*expr_hash_table
;
3070 /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
3071 This is all we have to do because an expression is not recorded if it
3072 is not available, and the only expressions we want to work with are the
3073 ones that are recorded. */
3074 for (i
= 0; i
< expr_hash_table
->size
; i
++)
3075 for (expr
= expr_hash_table
->table
[i
]; expr
!= 0; expr
= expr
->next_same_hash
)
3076 for (occr
= expr
->avail_occr
; occr
!= 0; occr
= occr
->next
)
3077 SET_BIT (ae_gen
[BLOCK_NUM (occr
->insn
)], expr
->bitmap_index
);
3080 /* Return nonzero if expression X is killed in BB. */
3083 expr_killed_p (x
, bb
)
3094 code
= GET_CODE (x
);
3098 return TEST_BIT (reg_set_in_block
[bb
->index
], REGNO (x
));
3101 if (load_killed_in_block_p (bb
, get_max_uid () + 1, x
, 0))
3104 return expr_killed_p (XEXP (x
, 0), bb
);
3122 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
3126 /* If we are about to do the last recursive call
3127 needed at this level, change it into iteration.
3128 This function is called enough to be worth it. */
3130 return expr_killed_p (XEXP (x
, i
), bb
);
3131 else if (expr_killed_p (XEXP (x
, i
), bb
))
3134 else if (fmt
[i
] == 'E')
3135 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
3136 if (expr_killed_p (XVECEXP (x
, i
, j
), bb
))
3143 /* Compute the set of available expressions killed in each basic block. */
3146 compute_ae_kill (ae_gen
, ae_kill
, expr_hash_table
)
3147 sbitmap
*ae_gen
, *ae_kill
;
3148 struct hash_table
*expr_hash_table
;
3155 for (i
= 0; i
< expr_hash_table
->size
; i
++)
3156 for (expr
= expr_hash_table
->table
[i
]; expr
; expr
= expr
->next_same_hash
)
3158 /* Skip EXPR if generated in this block. */
3159 if (TEST_BIT (ae_gen
[bb
->index
], expr
->bitmap_index
))
3162 if (expr_killed_p (expr
->expr
, bb
))
3163 SET_BIT (ae_kill
[bb
->index
], expr
->bitmap_index
);
3167 /* Actually perform the Classic GCSE optimizations. */
3169 /* Return nonzero if occurrence OCCR of expression EXPR reaches block BB.
3171 CHECK_SELF_LOOP is nonzero if we should consider a block reaching itself
3172 as a positive reach. We want to do this when there are two computations
3173 of the expression in the block.
3175 VISITED is a pointer to a working buffer for tracking which BB's have
3176 been visited. It is NULL for the top-level call.
3178 We treat reaching expressions that go through blocks containing the same
3179 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
3180 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
3181 2 as not reaching. The intent is to improve the probability of finding
3182 only one reaching expression and to reduce register lifetimes by picking
3183 the closest such expression. */
3186 expr_reaches_here_p_work (occr
, expr
, bb
, check_self_loop
, visited
)
3190 int check_self_loop
;
3195 for (pred
= bb
->pred
; pred
!= NULL
; pred
= pred
->pred_next
)
3197 basic_block pred_bb
= pred
->src
;
3199 if (visited
[pred_bb
->index
])
3200 /* This predecessor has already been visited. Nothing to do. */
3202 else if (pred_bb
== bb
)
3204 /* BB loops on itself. */
3206 && TEST_BIT (ae_gen
[pred_bb
->index
], expr
->bitmap_index
)
3207 && BLOCK_NUM (occr
->insn
) == pred_bb
->index
)
3210 visited
[pred_bb
->index
] = 1;
3213 /* Ignore this predecessor if it kills the expression. */
3214 else if (TEST_BIT (ae_kill
[pred_bb
->index
], expr
->bitmap_index
))
3215 visited
[pred_bb
->index
] = 1;
3217 /* Does this predecessor generate this expression? */
3218 else if (TEST_BIT (ae_gen
[pred_bb
->index
], expr
->bitmap_index
))
3220 /* Is this the occurrence we're looking for?
3221 Note that there's only one generating occurrence per block
3222 so we just need to check the block number. */
3223 if (BLOCK_NUM (occr
->insn
) == pred_bb
->index
)
3226 visited
[pred_bb
->index
] = 1;
3229 /* Neither gen nor kill. */
3232 visited
[pred_bb
->index
] = 1;
3233 if (expr_reaches_here_p_work (occr
, expr
, pred_bb
, check_self_loop
,
3240 /* All paths have been checked. */
3244 /* This wrapper for expr_reaches_here_p_work() is to ensure that any
3245 memory allocated for that function is returned. */
3248 expr_reaches_here_p (occr
, expr
, bb
, check_self_loop
)
3252 int check_self_loop
;
3255 char *visited
= (char *) xcalloc (last_basic_block
, 1);
3257 rval
= expr_reaches_here_p_work (occr
, expr
, bb
, check_self_loop
, visited
);
3263 /* Return the instruction that computes EXPR that reaches INSN's basic block.
3264 If there is more than one such instruction, return NULL.
3266 Called only by handle_avail_expr. */
3269 computing_insn (expr
, insn
)
3273 basic_block bb
= BLOCK_FOR_INSN (insn
);
3275 if (expr
->avail_occr
->next
== NULL
)
3277 if (BLOCK_FOR_INSN (expr
->avail_occr
->insn
) == bb
)
3278 /* The available expression is actually itself
3279 (i.e. a loop in the flow graph) so do nothing. */
3282 /* (FIXME) Case that we found a pattern that was created by
3283 a substitution that took place. */
3284 return expr
->avail_occr
->insn
;
3288 /* Pattern is computed more than once.
3289 Search backwards from this insn to see how many of these
3290 computations actually reach this insn. */
3292 rtx insn_computes_expr
= NULL
;
3295 for (occr
= expr
->avail_occr
; occr
!= NULL
; occr
= occr
->next
)
3297 if (BLOCK_FOR_INSN (occr
->insn
) == bb
)
3299 /* The expression is generated in this block.
3300 The only time we care about this is when the expression
3301 is generated later in the block [and thus there's a loop].
3302 We let the normal cse pass handle the other cases. */
3303 if (INSN_CUID (insn
) < INSN_CUID (occr
->insn
)
3304 && expr_reaches_here_p (occr
, expr
, bb
, 1))
3310 insn_computes_expr
= occr
->insn
;
3313 else if (expr_reaches_here_p (occr
, expr
, bb
, 0))
3319 insn_computes_expr
= occr
->insn
;
3323 if (insn_computes_expr
== NULL
)
3326 return insn_computes_expr
;
3330 /* Return nonzero if the definition in DEF_INSN can reach INSN.
3331 Only called by can_disregard_other_sets. */
3334 def_reaches_here_p (insn
, def_insn
)
3339 if (TEST_BIT (reaching_defs
[BLOCK_NUM (insn
)], INSN_CUID (def_insn
)))
3342 if (BLOCK_NUM (insn
) == BLOCK_NUM (def_insn
))
3344 if (INSN_CUID (def_insn
) < INSN_CUID (insn
))
3346 if (GET_CODE (PATTERN (def_insn
)) == PARALLEL
)
3348 else if (GET_CODE (PATTERN (def_insn
)) == CLOBBER
)
3349 reg
= XEXP (PATTERN (def_insn
), 0);
3350 else if (GET_CODE (PATTERN (def_insn
)) == SET
)
3351 reg
= SET_DEST (PATTERN (def_insn
));
3355 return ! reg_set_between_p (reg
, NEXT_INSN (def_insn
), insn
);
3364 /* Return nonzero if *ADDR_THIS_REG can only have one value at INSN. The
3365 value returned is the number of definitions that reach INSN. Returning a
3366 value of zero means that [maybe] more than one definition reaches INSN and
3367 the caller can't perform whatever optimization it is trying. i.e. it is
3368 always safe to return zero. */
3371 can_disregard_other_sets (addr_this_reg
, insn
, for_combine
)
3372 struct reg_set
**addr_this_reg
;
3376 int number_of_reaching_defs
= 0;
3377 struct reg_set
*this_reg
;
3379 for (this_reg
= *addr_this_reg
; this_reg
!= 0; this_reg
= this_reg
->next
)
3380 if (def_reaches_here_p (insn
, this_reg
->insn
))
3382 number_of_reaching_defs
++;
3383 /* Ignore parallels for now. */
3384 if (GET_CODE (PATTERN (this_reg
->insn
)) == PARALLEL
)
3388 && (GET_CODE (PATTERN (this_reg
->insn
)) == CLOBBER
3389 || ! rtx_equal_p (SET_SRC (PATTERN (this_reg
->insn
)),
3390 SET_SRC (PATTERN (insn
)))))
3391 /* A setting of the reg to a different value reaches INSN. */
3394 if (number_of_reaching_defs
> 1)
3396 /* If in this setting the value the register is being set to is
3397 equal to the previous value the register was set to and this
3398 setting reaches the insn we are trying to do the substitution
3399 on then we are ok. */
3400 if (GET_CODE (PATTERN (this_reg
->insn
)) == CLOBBER
)
3402 else if (! rtx_equal_p (SET_SRC (PATTERN (this_reg
->insn
)),
3403 SET_SRC (PATTERN (insn
))))
3407 *addr_this_reg
= this_reg
;
3410 return number_of_reaching_defs
;
3413 /* Expression computed by insn is available and the substitution is legal,
3414 so try to perform the substitution.
3416 The result is nonzero if any changes were made. */
3419 handle_avail_expr (insn
, expr
)
3423 rtx pat
, insn_computes_expr
, expr_set
;
3425 struct reg_set
*this_reg
;
3426 int found_setting
, use_src
;
3429 /* We only handle the case where one computation of the expression
3430 reaches this instruction. */
3431 insn_computes_expr
= computing_insn (expr
, insn
);
3432 if (insn_computes_expr
== NULL
)
3434 expr_set
= single_set (insn_computes_expr
);
3441 /* At this point we know only one computation of EXPR outside of this
3442 block reaches this insn. Now try to find a register that the
3443 expression is computed into. */
3444 if (GET_CODE (SET_SRC (expr_set
)) == REG
)
3446 /* This is the case when the available expression that reaches
3447 here has already been handled as an available expression. */
3448 unsigned int regnum_for_replacing
3449 = REGNO (SET_SRC (expr_set
));
3451 /* If the register was created by GCSE we can't use `reg_set_table',
3452 however we know it's set only once. */
3453 if (regnum_for_replacing
>= max_gcse_regno
3454 /* If the register the expression is computed into is set only once,
3455 or only one set reaches this insn, we can use it. */
3456 || (((this_reg
= reg_set_table
[regnum_for_replacing
]),
3457 this_reg
->next
== NULL
)
3458 || can_disregard_other_sets (&this_reg
, insn
, 0)))
3467 unsigned int regnum_for_replacing
3468 = REGNO (SET_DEST (expr_set
));
3470 /* This shouldn't happen. */
3471 if (regnum_for_replacing
>= max_gcse_regno
)
3474 this_reg
= reg_set_table
[regnum_for_replacing
];
3476 /* If the register the expression is computed into is set only once,
3477 or only one set reaches this insn, use it. */
3478 if (this_reg
->next
== NULL
3479 || can_disregard_other_sets (&this_reg
, insn
, 0))
3485 pat
= PATTERN (insn
);
3487 to
= SET_SRC (expr_set
);
3489 to
= SET_DEST (expr_set
);
3490 changed
= validate_change (insn
, &SET_SRC (pat
), to
, 0);
3492 /* We should be able to ignore the return code from validate_change but
3493 to play it safe we check. */
3497 if (gcse_file
!= NULL
)
3499 fprintf (gcse_file
, "GCSE: Replacing the source in insn %d with",
3501 fprintf (gcse_file
, " reg %d %s insn %d\n",
3502 REGNO (to
), use_src
? "from" : "set in",
3503 INSN_UID (insn_computes_expr
));
3508 /* The register that the expr is computed into is set more than once. */
3509 else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3511 /* Insert an insn after insnx that copies the reg set in insnx
3512 into a new pseudo register call this new register REGN.
3513 From insnb until end of basic block or until REGB is set
3514 replace all uses of REGB with REGN. */
3517 to
= gen_reg_rtx (GET_MODE (SET_DEST (expr_set
)));
3519 /* Generate the new insn. */
3520 /* ??? If the change fails, we return 0, even though we created
3521 an insn. I think this is ok. */
3523 = emit_insn_after (gen_rtx_SET (VOIDmode
, to
,
3524 SET_DEST (expr_set
)),
3525 insn_computes_expr
);
3527 /* Keep register set table up to date. */
3528 record_one_set (REGNO (to
), new_insn
);
3530 gcse_create_count
++;
3531 if (gcse_file
!= NULL
)
3533 fprintf (gcse_file
, "GCSE: Creating insn %d to copy value of reg %d",
3534 INSN_UID (NEXT_INSN (insn_computes_expr
)),
3535 REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr
)))));
3536 fprintf (gcse_file
, ", computed in insn %d,\n",
3537 INSN_UID (insn_computes_expr
));
3538 fprintf (gcse_file
, " into newly allocated reg %d\n",
3542 pat
= PATTERN (insn
);
3544 /* Do register replacement for INSN. */
3545 changed
= validate_change (insn
, &SET_SRC (pat
),
3547 (NEXT_INSN (insn_computes_expr
))),
3550 /* We should be able to ignore the return code from validate_change but
3551 to play it safe we check. */
3555 if (gcse_file
!= NULL
)
3558 "GCSE: Replacing the source in insn %d with reg %d ",
3560 REGNO (SET_DEST (PATTERN (NEXT_INSN
3561 (insn_computes_expr
)))));
3562 fprintf (gcse_file
, "set in insn %d\n",
3563 INSN_UID (insn_computes_expr
));
3571 /* Perform classic GCSE. This is called by one_classic_gcse_pass after all
3572 the dataflow analysis has been done.
3574 The result is nonzero if a change was made. */
3583 /* Note we start at block 1. */
3585 if (ENTRY_BLOCK_PTR
->next_bb
== EXIT_BLOCK_PTR
)
3589 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
->next_bb
->next_bb
, EXIT_BLOCK_PTR
, next_bb
)
3591 /* Reset tables used to keep track of what's still valid [since the
3592 start of the block]. */
3593 reset_opr_set_tables ();
3595 for (insn
= bb
->head
;
3596 insn
!= NULL
&& insn
!= NEXT_INSN (bb
->end
);
3597 insn
= NEXT_INSN (insn
))
3599 /* Is insn of form (set (pseudo-reg) ...)? */
3600 if (GET_CODE (insn
) == INSN
3601 && GET_CODE (PATTERN (insn
)) == SET
3602 && GET_CODE (SET_DEST (PATTERN (insn
))) == REG
3603 && REGNO (SET_DEST (PATTERN (insn
))) >= FIRST_PSEUDO_REGISTER
)
3605 rtx pat
= PATTERN (insn
);
3606 rtx src
= SET_SRC (pat
);
3609 if (want_to_gcse_p (src
)
3610 /* Is the expression recorded? */
3611 && ((expr
= lookup_expr (src
, &expr_hash_table
)) != NULL
)
3612 /* Is the expression available [at the start of the
3614 && TEST_BIT (ae_in
[bb
->index
], expr
->bitmap_index
)
3615 /* Are the operands unchanged since the start of the
3617 && oprs_not_set_p (src
, insn
))
3618 changed
|= handle_avail_expr (insn
, expr
);
3621 /* Keep track of everything modified by this insn. */
3622 /* ??? Need to be careful w.r.t. mods done to INSN. */
3624 mark_oprs_set (insn
);
3631 /* Top level routine to perform one classic GCSE pass.
3633 Return nonzero if a change was made. */
3636 one_classic_gcse_pass (pass
)
3641 gcse_subst_count
= 0;
3642 gcse_create_count
= 0;
3644 alloc_hash_table (max_cuid
, &expr_hash_table
, 0);
3645 alloc_rd_mem (last_basic_block
, max_cuid
);
3646 compute_hash_table (&expr_hash_table
);
3648 dump_hash_table (gcse_file
, "Expression", &expr_hash_table
);
3650 if (expr_hash_table
.n_elems
> 0)
3654 alloc_avail_expr_mem (last_basic_block
, expr_hash_table
.n_elems
);
3655 compute_ae_gen (&expr_hash_table
);
3656 compute_ae_kill (ae_gen
, ae_kill
, &expr_hash_table
);
3657 compute_available (ae_gen
, ae_kill
, ae_out
, ae_in
);
3658 changed
= classic_gcse ();
3659 free_avail_expr_mem ();
3663 free_hash_table (&expr_hash_table
);
3667 fprintf (gcse_file
, "\n");
3668 fprintf (gcse_file
, "GCSE of %s, pass %d: %d bytes needed, %d substs,",
3669 current_function_name
, pass
, bytes_used
, gcse_subst_count
);
3670 fprintf (gcse_file
, "%d insns created\n", gcse_create_count
);
3676 /* Compute copy/constant propagation working variables. */
3678 /* Local properties of assignments. */
3679 static sbitmap
*cprop_pavloc
;
3680 static sbitmap
*cprop_absaltered
;
3682 /* Global properties of assignments (computed from the local properties). */
3683 static sbitmap
*cprop_avin
;
3684 static sbitmap
*cprop_avout
;
3686 /* Allocate vars used for copy/const propagation. N_BLOCKS is the number of
3687 basic blocks. N_SETS is the number of sets. */
3690 alloc_cprop_mem (n_blocks
, n_sets
)
3691 int n_blocks
, n_sets
;
3693 cprop_pavloc
= sbitmap_vector_alloc (n_blocks
, n_sets
);
3694 cprop_absaltered
= sbitmap_vector_alloc (n_blocks
, n_sets
);
3696 cprop_avin
= sbitmap_vector_alloc (n_blocks
, n_sets
);
3697 cprop_avout
= sbitmap_vector_alloc (n_blocks
, n_sets
);
3700 /* Free vars used by copy/const propagation. */
3705 sbitmap_vector_free (cprop_pavloc
);
3706 sbitmap_vector_free (cprop_absaltered
);
3707 sbitmap_vector_free (cprop_avin
);
3708 sbitmap_vector_free (cprop_avout
);
3711 /* For each block, compute whether X is transparent. X is either an
3712 expression or an assignment [though we don't care which, for this context
3713 an assignment is treated as an expression]. For each block where an
3714 element of X is modified, set (SET_P == 1) or reset (SET_P == 0) the INDX
3718 compute_transp (x
, indx
, bmap
, set_p
)
3730 /* repeat is used to turn tail-recursion into iteration since GCC
3731 can't do it when there's no return value. */
3737 code
= GET_CODE (x
);
3743 if (REGNO (x
) < FIRST_PSEUDO_REGISTER
)
3746 if (TEST_BIT (reg_set_in_block
[bb
->index
], REGNO (x
)))
3747 SET_BIT (bmap
[bb
->index
], indx
);
3751 for (r
= reg_set_table
[REGNO (x
)]; r
!= NULL
; r
= r
->next
)
3752 SET_BIT (bmap
[BLOCK_NUM (r
->insn
)], indx
);
3757 if (REGNO (x
) < FIRST_PSEUDO_REGISTER
)
3760 if (TEST_BIT (reg_set_in_block
[bb
->index
], REGNO (x
)))
3761 RESET_BIT (bmap
[bb
->index
], indx
);
3765 for (r
= reg_set_table
[REGNO (x
)]; r
!= NULL
; r
= r
->next
)
3766 RESET_BIT (bmap
[BLOCK_NUM (r
->insn
)], indx
);
3775 rtx list_entry
= canon_modify_mem_list
[bb
->index
];
3779 rtx dest
, dest_addr
;
3781 if (GET_CODE (XEXP (list_entry
, 0)) == CALL_INSN
)
3784 SET_BIT (bmap
[bb
->index
], indx
);
3786 RESET_BIT (bmap
[bb
->index
], indx
);
3789 /* LIST_ENTRY must be an INSN of some kind that sets memory.
3790 Examine each hunk of memory that is modified. */
3792 dest
= XEXP (list_entry
, 0);
3793 list_entry
= XEXP (list_entry
, 1);
3794 dest_addr
= XEXP (list_entry
, 0);
3796 if (canon_true_dependence (dest
, GET_MODE (dest
), dest_addr
,
3797 x
, rtx_addr_varies_p
))
3800 SET_BIT (bmap
[bb
->index
], indx
);
3802 RESET_BIT (bmap
[bb
->index
], indx
);
3805 list_entry
= XEXP (list_entry
, 1);
3828 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
3832 /* If we are about to do the last recursive call
3833 needed at this level, change it into iteration.
3834 This function is called enough to be worth it. */
3841 compute_transp (XEXP (x
, i
), indx
, bmap
, set_p
);
3843 else if (fmt
[i
] == 'E')
3844 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
3845 compute_transp (XVECEXP (x
, i
, j
), indx
, bmap
, set_p
);
3849 /* Top level routine to do the dataflow analysis needed by copy/const
3853 compute_cprop_data ()
3855 compute_local_properties (cprop_absaltered
, cprop_pavloc
, NULL
, &set_hash_table
);
3856 compute_available (cprop_pavloc
, cprop_absaltered
,
3857 cprop_avout
, cprop_avin
);
3860 /* Copy/constant propagation. */
3862 /* Maximum number of register uses in an insn that we handle. */
3865 /* Table of uses found in an insn.
3866 Allocated statically to avoid alloc/free complexity and overhead. */
3867 static struct reg_use reg_use_table
[MAX_USES
];
3869 /* Index into `reg_use_table' while building it. */
3870 static int reg_use_count
;
3872 /* Set up a list of register numbers used in INSN. The found uses are stored
3873 in `reg_use_table'. `reg_use_count' is initialized to zero before entry,
3874 and contains the number of uses in the table upon exit.
3876 ??? If a register appears multiple times we will record it multiple times.
3877 This doesn't hurt anything but it will slow things down. */
3880 find_used_regs (xptr
, data
)
3882 void *data ATTRIBUTE_UNUSED
;
3889 /* repeat is used to turn tail-recursion into iteration since GCC
3890 can't do it when there's no return value. */
3895 code
= GET_CODE (x
);
3898 if (reg_use_count
== MAX_USES
)
3901 reg_use_table
[reg_use_count
].reg_rtx
= x
;
3905 /* Recursively scan the operands of this expression. */
3907 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
3911 /* If we are about to do the last recursive call
3912 needed at this level, change it into iteration.
3913 This function is called enough to be worth it. */
3920 find_used_regs (&XEXP (x
, i
), data
);
3922 else if (fmt
[i
] == 'E')
3923 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
3924 find_used_regs (&XVECEXP (x
, i
, j
), data
);
3928 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3929 Returns nonzero is successful. */
3932 try_replace_reg (from
, to
, insn
)
3935 rtx note
= find_reg_equal_equiv_note (insn
);
3938 rtx set
= single_set (insn
);
3940 validate_replace_src_group (from
, to
, insn
);
3941 if (num_changes_pending () && apply_change_group ())
3944 if (!success
&& set
&& reg_mentioned_p (from
, SET_SRC (set
)))
3946 /* If above failed and this is a single set, try to simplify the source of
3947 the set given our substitution. We could perhaps try this for multiple
3948 SETs, but it probably won't buy us anything. */
3949 src
= simplify_replace_rtx (SET_SRC (set
), from
, to
);
3951 if (!rtx_equal_p (src
, SET_SRC (set
))
3952 && validate_change (insn
, &SET_SRC (set
), src
, 0))
3955 /* If we've failed to do replacement, have a single SET, and don't already
3956 have a note, add a REG_EQUAL note to not lose information. */
3957 if (!success
&& note
== 0 && set
!= 0)
3958 note
= set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
3961 /* If there is already a NOTE, update the expression in it with our
3964 XEXP (note
, 0) = simplify_replace_rtx (XEXP (note
, 0), from
, to
);
3966 /* REG_EQUAL may get simplified into register.
3967 We don't allow that. Remove that note. This code ought
3968 not to happen, because previous code ought to synthesize
3969 reg-reg move, but be on the safe side. */
3970 if (note
&& REG_P (XEXP (note
, 0)))
3971 remove_note (insn
, note
);
3976 /* Find a set of REGNOs that are available on entry to INSN's block. Returns
3977 NULL no such set is found. */
3979 static struct expr
*
3980 find_avail_set (regno
, insn
)
3984 /* SET1 contains the last set found that can be returned to the caller for
3985 use in a substitution. */
3986 struct expr
*set1
= 0;
3988 /* Loops are not possible here. To get a loop we would need two sets
3989 available at the start of the block containing INSN. ie we would
3990 need two sets like this available at the start of the block:
3992 (set (reg X) (reg Y))
3993 (set (reg Y) (reg X))
3995 This can not happen since the set of (reg Y) would have killed the
3996 set of (reg X) making it unavailable at the start of this block. */
4000 struct expr
*set
= lookup_set (regno
, NULL_RTX
, &set_hash_table
);
4002 /* Find a set that is available at the start of the block
4003 which contains INSN. */
4006 if (TEST_BIT (cprop_avin
[BLOCK_NUM (insn
)], set
->bitmap_index
))
4008 set
= next_set (regno
, set
);
4011 /* If no available set was found we've reached the end of the
4012 (possibly empty) copy chain. */
4016 if (GET_CODE (set
->expr
) != SET
)
4019 src
= SET_SRC (set
->expr
);
4021 /* We know the set is available.
4022 Now check that SRC is ANTLOC (i.e. none of the source operands
4023 have changed since the start of the block).
4025 If the source operand changed, we may still use it for the next
4026 iteration of this loop, but we may not use it for substitutions. */
4028 if (CONSTANT_P (src
) || oprs_not_set_p (src
, insn
))
4031 /* If the source of the set is anything except a register, then
4032 we have reached the end of the copy chain. */
4033 if (GET_CODE (src
) != REG
)
4036 /* Follow the copy chain, ie start another iteration of the loop
4037 and see if we have an available copy into SRC. */
4038 regno
= REGNO (src
);
4041 /* SET1 holds the last set that was available and anticipatable at
4046 /* Subroutine of cprop_insn that tries to propagate constants into
4047 JUMP_INSNS. JUMP must be a conditional jump. If SETCC is non-NULL
4048 it is the instruction that immediately precedes JUMP, and must be a
4049 single SET of a register. FROM is what we will try to replace,
4050 SRC is the constant we will try to substitute for it. Returns nonzero
4051 if a change was made. */
4054 cprop_jump (bb
, setcc
, jump
, from
, src
)
4062 rtx set
= pc_set (jump
);
4064 /* First substitute in the INSN condition as the SET_SRC of the JUMP,
4065 then substitute that given values in this expanded JUMP. */
4067 && !modified_between_p (from
, setcc
, jump
)
4068 && !modified_between_p (src
, setcc
, jump
))
4070 rtx setcc_set
= single_set (setcc
);
4071 new_set
= simplify_replace_rtx (SET_SRC (set
),
4072 SET_DEST (setcc_set
),
4073 SET_SRC (setcc_set
));
4078 new = simplify_replace_rtx (new_set
, from
, src
);
4080 /* If no simplification can be made, then try the next
4082 if (rtx_equal_p (new, new_set
) || rtx_equal_p (new, SET_SRC (set
)))
4085 /* If this is now a no-op delete it, otherwise this must be a valid insn. */
4090 /* Ensure the value computed inside the jump insn to be equivalent
4091 to one computed by setcc. */
4093 && modified_in_p (new, setcc
))
4095 if (! validate_change (jump
, &SET_SRC (set
), new, 0))
4098 /* If this has turned into an unconditional jump,
4099 then put a barrier after it so that the unreachable
4100 code will be deleted. */
4101 if (GET_CODE (SET_SRC (set
)) == LABEL_REF
)
4102 emit_barrier_after (jump
);
4106 /* Delete the cc0 setter. */
4107 if (setcc
!= NULL
&& CC0_P (SET_DEST (single_set (setcc
))))
4108 delete_insn (setcc
);
4111 run_jump_opt_after_gcse
= 1;
4114 if (gcse_file
!= NULL
)
4117 "CONST-PROP: Replacing reg %d in jump_insn %d with constant ",
4118 REGNO (from
), INSN_UID (jump
));
4119 print_rtl (gcse_file
, src
);
4120 fprintf (gcse_file
, "\n");
4122 purge_dead_edges (bb
);
4128 constprop_register (insn
, from
, to
, alter_jumps
)
4136 /* Check for reg or cc0 setting instructions followed by
4137 conditional branch instructions first. */
4139 && (sset
= single_set (insn
)) != NULL
4141 && any_condjump_p (NEXT_INSN (insn
)) && onlyjump_p (NEXT_INSN (insn
)))
4143 rtx dest
= SET_DEST (sset
);
4144 if ((REG_P (dest
) || CC0_P (dest
))
4145 && cprop_jump (BLOCK_FOR_INSN (insn
), insn
, NEXT_INSN (insn
), from
, to
))
4149 /* Handle normal insns next. */
4150 if (GET_CODE (insn
) == INSN
4151 && try_replace_reg (from
, to
, insn
))
4154 /* Try to propagate a CONST_INT into a conditional jump.
4155 We're pretty specific about what we will handle in this
4156 code, we can extend this as necessary over time.
4158 Right now the insn in question must look like
4159 (set (pc) (if_then_else ...)) */
4160 else if (alter_jumps
&& any_condjump_p (insn
) && onlyjump_p (insn
))
4161 return cprop_jump (BLOCK_FOR_INSN (insn
), NULL
, insn
, from
, to
);
4165 /* Perform constant and copy propagation on INSN.
4166 The result is nonzero if a change was made. */
4169 cprop_insn (insn
, alter_jumps
)
4173 struct reg_use
*reg_used
;
4181 note_uses (&PATTERN (insn
), find_used_regs
, NULL
);
4183 note
= find_reg_equal_equiv_note (insn
);
4185 /* We may win even when propagating constants into notes. */
4187 find_used_regs (&XEXP (note
, 0), NULL
);
4189 for (reg_used
= ®_use_table
[0]; reg_use_count
> 0;
4190 reg_used
++, reg_use_count
--)
4192 unsigned int regno
= REGNO (reg_used
->reg_rtx
);
4196 /* Ignore registers created by GCSE.
4197 We do this because ... */
4198 if (regno
>= max_gcse_regno
)
4201 /* If the register has already been set in this block, there's
4202 nothing we can do. */
4203 if (! oprs_not_set_p (reg_used
->reg_rtx
, insn
))
4206 /* Find an assignment that sets reg_used and is available
4207 at the start of the block. */
4208 set
= find_avail_set (regno
, insn
);
4213 /* ??? We might be able to handle PARALLELs. Later. */
4214 if (GET_CODE (pat
) != SET
)
4217 src
= SET_SRC (pat
);
4219 /* Constant propagation. */
4220 if (CONSTANT_P (src
))
4222 if (constprop_register (insn
, reg_used
->reg_rtx
, src
, alter_jumps
))
4226 if (gcse_file
!= NULL
)
4228 fprintf (gcse_file
, "GLOBAL CONST-PROP: Replacing reg %d in ", regno
);
4229 fprintf (gcse_file
, "insn %d with constant ", INSN_UID (insn
));
4230 print_rtl (gcse_file
, src
);
4231 fprintf (gcse_file
, "\n");
4235 else if (GET_CODE (src
) == REG
4236 && REGNO (src
) >= FIRST_PSEUDO_REGISTER
4237 && REGNO (src
) != regno
)
4239 if (try_replace_reg (reg_used
->reg_rtx
, src
, insn
))
4243 if (gcse_file
!= NULL
)
4245 fprintf (gcse_file
, "GLOBAL COPY-PROP: Replacing reg %d in insn %d",
4246 regno
, INSN_UID (insn
));
4247 fprintf (gcse_file
, " with reg %d\n", REGNO (src
));
4250 /* The original insn setting reg_used may or may not now be
4251 deletable. We leave the deletion to flow. */
4252 /* FIXME: If it turns out that the insn isn't deletable,
4253 then we may have unnecessarily extended register lifetimes
4254 and made things worse. */
4262 /* Like find_used_regs, but avoid recording uses that appear in
4263 input-output contexts such as zero_extract or pre_dec. This
4264 restricts the cases we consider to those for which local cprop
4265 can legitimately make replacements. */
4268 local_cprop_find_used_regs (xptr
, data
)
4277 switch (GET_CODE (x
))
4281 case STRICT_LOW_PART
:
4290 /* Can only legitimately appear this early in the context of
4291 stack pushes for function arguments, but handle all of the
4292 codes nonetheless. */
4296 /* Setting a subreg of a register larger than word_mode leaves
4297 the non-written words unchanged. */
4298 if (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (x
))) > BITS_PER_WORD
)
4306 find_used_regs (xptr
, data
);
4309 /* LIBCALL_SP is a zero-terminated array of insns at the end of a libcall;
4310 their REG_EQUAL notes need updating. */
4313 do_local_cprop (x
, insn
, alter_jumps
, libcall_sp
)
4319 rtx newreg
= NULL
, newcnst
= NULL
;
4321 /* Rule out USE instructions and ASM statements as we don't want to
4322 change the hard registers mentioned. */
4323 if (GET_CODE (x
) == REG
4324 && (REGNO (x
) >= FIRST_PSEUDO_REGISTER
4325 || (GET_CODE (PATTERN (insn
)) != USE
4326 && asm_noperands (PATTERN (insn
)) < 0)))
4328 cselib_val
*val
= cselib_lookup (x
, GET_MODE (x
), 0);
4329 struct elt_loc_list
*l
;
4333 for (l
= val
->locs
; l
; l
= l
->next
)
4335 rtx this_rtx
= l
->loc
;
4341 if (CONSTANT_P (this_rtx
)
4342 && GET_CODE (this_rtx
) != CONSTANT_P_RTX
)
4344 if (REG_P (this_rtx
) && REGNO (this_rtx
) >= FIRST_PSEUDO_REGISTER
4345 /* Don't copy propagate if it has attached REG_EQUIV note.
4346 At this point this only function parameters should have
4347 REG_EQUIV notes and if the argument slot is used somewhere
4348 explicitly, it means address of parameter has been taken,
4349 so we should not extend the lifetime of the pseudo. */
4350 && (!(note
= find_reg_note (l
->setting_insn
, REG_EQUIV
, NULL_RTX
))
4351 || GET_CODE (XEXP (note
, 0)) != MEM
))
4354 if (newcnst
&& constprop_register (insn
, x
, newcnst
, alter_jumps
))
4356 /* If we find a case where we can't fix the retval REG_EQUAL notes
4357 match the new register, we either have to abandon this replacement
4358 or fix delete_trivially_dead_insns to preserve the setting insn,
4359 or make it delete the REG_EUAQL note, and fix up all passes that
4360 require the REG_EQUAL note there. */
4361 if (!adjust_libcall_notes (x
, newcnst
, insn
, libcall_sp
))
4363 if (gcse_file
!= NULL
)
4365 fprintf (gcse_file
, "LOCAL CONST-PROP: Replacing reg %d in ",
4367 fprintf (gcse_file
, "insn %d with constant ",
4369 print_rtl (gcse_file
, newcnst
);
4370 fprintf (gcse_file
, "\n");
4375 else if (newreg
&& newreg
!= x
&& try_replace_reg (x
, newreg
, insn
))
4377 adjust_libcall_notes (x
, newreg
, insn
, libcall_sp
);
4378 if (gcse_file
!= NULL
)
4381 "LOCAL COPY-PROP: Replacing reg %d in insn %d",
4382 REGNO (x
), INSN_UID (insn
));
4383 fprintf (gcse_file
, " with reg %d\n", REGNO (newreg
));
4392 /* LIBCALL_SP is a zero-terminated array of insns at the end of a libcall;
4393 their REG_EQUAL notes need updating to reflect that OLDREG has been
4394 replaced with NEWVAL in INSN. Return true if all substitutions could
4397 adjust_libcall_notes (oldreg
, newval
, insn
, libcall_sp
)
4398 rtx oldreg
, newval
, insn
, *libcall_sp
;
4402 while ((end
= *libcall_sp
++))
4404 rtx note
= find_reg_equal_equiv_note (end
);
4411 if (reg_set_between_p (newval
, PREV_INSN (insn
), end
))
4415 note
= find_reg_equal_equiv_note (end
);
4418 if (reg_mentioned_p (newval
, XEXP (note
, 0)))
4421 while ((end
= *libcall_sp
++));
4425 XEXP (note
, 0) = replace_rtx (XEXP (note
, 0), oldreg
, newval
);
4431 #define MAX_NESTED_LIBCALLS 9
4434 local_cprop_pass (alter_jumps
)
4438 struct reg_use
*reg_used
;
4439 rtx libcall_stack
[MAX_NESTED_LIBCALLS
+ 1], *libcall_sp
;
4440 bool changed
= false;
4443 libcall_sp
= &libcall_stack
[MAX_NESTED_LIBCALLS
];
4445 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
4449 rtx note
= find_reg_note (insn
, REG_LIBCALL
, NULL_RTX
);
4453 if (libcall_sp
== libcall_stack
)
4455 *--libcall_sp
= XEXP (note
, 0);
4457 note
= find_reg_note (insn
, REG_RETVAL
, NULL_RTX
);
4460 note
= find_reg_equal_equiv_note (insn
);
4464 note_uses (&PATTERN (insn
), local_cprop_find_used_regs
, NULL
);
4466 local_cprop_find_used_regs (&XEXP (note
, 0), NULL
);
4468 for (reg_used
= ®_use_table
[0]; reg_use_count
> 0;
4469 reg_used
++, reg_use_count
--)
4470 if (do_local_cprop (reg_used
->reg_rtx
, insn
, alter_jumps
,
4477 while (reg_use_count
);
4479 cselib_process_insn (insn
);
4482 /* Global analysis may get into infinite loops for unreachable blocks. */
4483 if (changed
&& alter_jumps
)
4485 delete_unreachable_blocks ();
4486 free_reg_set_mem ();
4487 alloc_reg_set_mem (max_reg_num ());
4488 compute_sets (get_insns ());
4492 /* Forward propagate copies. This includes copies and constants. Return
4493 nonzero if a change was made. */
4503 /* Note we start at block 1. */
4504 if (ENTRY_BLOCK_PTR
->next_bb
== EXIT_BLOCK_PTR
)
4506 if (gcse_file
!= NULL
)
4507 fprintf (gcse_file
, "\n");
4512 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
->next_bb
->next_bb
, EXIT_BLOCK_PTR
, next_bb
)
4514 /* Reset tables used to keep track of what's still valid [since the
4515 start of the block]. */
4516 reset_opr_set_tables ();
4518 for (insn
= bb
->head
;
4519 insn
!= NULL
&& insn
!= NEXT_INSN (bb
->end
);
4520 insn
= NEXT_INSN (insn
))
4523 changed
|= cprop_insn (insn
, alter_jumps
);
4525 /* Keep track of everything modified by this insn. */
4526 /* ??? Need to be careful w.r.t. mods done to INSN. Don't
4527 call mark_oprs_set if we turned the insn into a NOTE. */
4528 if (GET_CODE (insn
) != NOTE
)
4529 mark_oprs_set (insn
);
4533 if (gcse_file
!= NULL
)
4534 fprintf (gcse_file
, "\n");
4539 /* Similar to get_condition, only the resulting condition must be
4540 valid at JUMP, instead of at EARLIEST.
4542 This differs from noce_get_condition in ifcvt.c in that we prefer not to
4543 settle for the condition variable in the jump instruction being integral.
4544 We prefer to be able to record the value of a user variable, rather than
4545 the value of a temporary used in a condition. This could be solved by
4546 recording the value of *every* register scaned by canonicalize_condition,
4547 but this would require some code reorganization. */
4550 fis_get_condition (jump
)
4553 rtx cond
, set
, tmp
, insn
, earliest
;
4556 if (! any_condjump_p (jump
))
4559 set
= pc_set (jump
);
4560 cond
= XEXP (SET_SRC (set
), 0);
4562 /* If this branches to JUMP_LABEL when the condition is false,
4563 reverse the condition. */
4564 reverse
= (GET_CODE (XEXP (SET_SRC (set
), 2)) == LABEL_REF
4565 && XEXP (XEXP (SET_SRC (set
), 2), 0) == JUMP_LABEL (jump
));
4567 /* Use canonicalize_condition to do the dirty work of manipulating
4568 MODE_CC values and COMPARE rtx codes. */
4569 tmp
= canonicalize_condition (jump
, cond
, reverse
, &earliest
, NULL_RTX
);
4573 /* Verify that the given condition is valid at JUMP by virtue of not
4574 having been modified since EARLIEST. */
4575 for (insn
= earliest
; insn
!= jump
; insn
= NEXT_INSN (insn
))
4576 if (INSN_P (insn
) && modified_in_p (tmp
, insn
))
4581 /* The condition was modified. See if we can get a partial result
4582 that doesn't follow all the reversals. Perhaps combine can fold
4583 them together later. */
4584 tmp
= XEXP (tmp
, 0);
4585 if (!REG_P (tmp
) || GET_MODE_CLASS (GET_MODE (tmp
)) != MODE_INT
)
4587 tmp
= canonicalize_condition (jump
, cond
, reverse
, &earliest
, tmp
);
4591 /* For sanity's sake, re-validate the new result. */
4592 for (insn
= earliest
; insn
!= jump
; insn
= NEXT_INSN (insn
))
4593 if (INSN_P (insn
) && modified_in_p (tmp
, insn
))
4599 /* Find the implicit sets of a function. An "implicit set" is a constraint
4600 on the value of a variable, implied by a conditional jump. For example,
4601 following "if (x == 2)", the then branch may be optimized as though the
4602 conditional performed an "explicit set", in this example, "x = 2". This
4603 function records the set patterns that are implicit at the start of each
4607 find_implicit_sets ()
4609 basic_block bb
, dest
;
4615 /* Check for more than one sucessor. */
4616 if (bb
->succ
&& bb
->succ
->succ_next
)
4618 cond
= fis_get_condition (bb
->end
);
4621 && (GET_CODE (cond
) == EQ
|| GET_CODE (cond
) == NE
)
4622 && GET_CODE (XEXP (cond
, 0)) == REG
4623 && REGNO (XEXP (cond
, 0)) >= FIRST_PSEUDO_REGISTER
4624 && CONSTANT_P (XEXP (cond
, 1)))
4626 dest
= GET_CODE (cond
) == EQ
? BRANCH_EDGE (bb
)->dest
4627 : FALLTHRU_EDGE (bb
)->dest
;
4629 if (dest
&& ! dest
->pred
->pred_next
4630 && dest
!= EXIT_BLOCK_PTR
)
4632 new = gen_rtx_SET (VOIDmode
, XEXP (cond
, 0),
4634 implicit_sets
[dest
->index
] = new;
4637 fprintf(gcse_file
, "Implicit set of reg %d in ",
4638 REGNO (XEXP (cond
, 0)));
4639 fprintf(gcse_file
, "basic block %d\n", dest
->index
);
4647 fprintf (gcse_file
, "Found %d implicit sets\n", count
);
4650 /* Perform one copy/constant propagation pass.
4651 PASS is the pass count. If CPROP_JUMPS is true, perform constant
4652 propagation into conditional jumps. If BYPASS_JUMPS is true,
4653 perform conditional jump bypassing optimizations. */
4656 one_cprop_pass (pass
, cprop_jumps
, bypass_jumps
)
4663 const_prop_count
= 0;
4664 copy_prop_count
= 0;
4666 local_cprop_pass (cprop_jumps
);
4668 /* Determine implicit sets. */
4669 implicit_sets
= (rtx
*) xcalloc (last_basic_block
, sizeof (rtx
));
4670 find_implicit_sets ();
4672 alloc_hash_table (max_cuid
, &set_hash_table
, 1);
4673 compute_hash_table (&set_hash_table
);
4675 /* Free implicit_sets before peak usage. */
4676 free (implicit_sets
);
4677 implicit_sets
= NULL
;
4680 dump_hash_table (gcse_file
, "SET", &set_hash_table
);
4681 if (set_hash_table
.n_elems
> 0)
4683 alloc_cprop_mem (last_basic_block
, set_hash_table
.n_elems
);
4684 compute_cprop_data ();
4685 changed
= cprop (cprop_jumps
);
4687 changed
|= bypass_conditional_jumps ();
4691 free_hash_table (&set_hash_table
);
4695 fprintf (gcse_file
, "CPROP of %s, pass %d: %d bytes needed, ",
4696 current_function_name
, pass
, bytes_used
);
4697 fprintf (gcse_file
, "%d const props, %d copy props\n\n",
4698 const_prop_count
, copy_prop_count
);
4700 /* Global analysis may get into infinite loops for unreachable blocks. */
4701 if (changed
&& cprop_jumps
)
4702 delete_unreachable_blocks ();
4707 /* Bypass conditional jumps. */
4709 /* The value of last_basic_block at the beginning of the jump_bypass
4710 pass. The use of redirect_edge_and_branch_force may introduce new
4711 basic blocks, but the data flow analysis is only valid for basic
4712 block indices less than bypass_last_basic_block. */
4714 static int bypass_last_basic_block
;
4716 /* Find a set of REGNO to a constant that is available at the end of basic
4717 block BB. Returns NULL if no such set is found. Based heavily upon
4720 static struct expr
*
4721 find_bypass_set (regno
, bb
)
4725 struct expr
*result
= 0;
4730 struct expr
*set
= lookup_set (regno
, NULL_RTX
, &set_hash_table
);
4734 if (TEST_BIT (cprop_avout
[bb
], set
->bitmap_index
))
4736 set
= next_set (regno
, set
);
4742 if (GET_CODE (set
->expr
) != SET
)
4745 src
= SET_SRC (set
->expr
);
4746 if (CONSTANT_P (src
))
4749 if (GET_CODE (src
) != REG
)
4752 regno
= REGNO (src
);
4758 /* Subroutine of bypass_conditional_jumps that attempts to bypass the given
4759 basic block BB which has more than one predecessor. If not NULL, SETCC
4760 is the first instruction of BB, which is immediately followed by JUMP_INSN
4761 JUMP. Otherwise, SETCC is NULL, and JUMP is the first insn of BB.
4762 Returns nonzero if a change was made. */
4765 bypass_block (bb
, setcc
, jump
)
4773 insn
= (setcc
!= NULL
) ? setcc
: jump
;
4775 /* Determine set of register uses in INSN. */
4777 note_uses (&PATTERN (insn
), find_used_regs
, NULL
);
4778 note
= find_reg_equal_equiv_note (insn
);
4780 find_used_regs (&XEXP (note
, 0), NULL
);
4783 for (e
= bb
->pred
; e
; e
= enext
)
4785 enext
= e
->pred_next
;
4786 if (e
->flags
& EDGE_COMPLEX
)
4789 /* We can't redirect edges from new basic blocks. */
4790 if (e
->src
->index
>= bypass_last_basic_block
)
4793 for (i
= 0; i
< reg_use_count
; i
++)
4795 struct reg_use
*reg_used
= ®_use_table
[i
];
4796 unsigned int regno
= REGNO (reg_used
->reg_rtx
);
4797 basic_block dest
, old_dest
;
4801 if (regno
>= max_gcse_regno
)
4804 set
= find_bypass_set (regno
, e
->src
->index
);
4809 src
= SET_SRC (pc_set (jump
));
4812 src
= simplify_replace_rtx (src
,
4813 SET_DEST (PATTERN (setcc
)),
4814 SET_SRC (PATTERN (setcc
)));
4816 new = simplify_replace_rtx (src
, reg_used
->reg_rtx
,
4817 SET_SRC (set
->expr
));
4820 dest
= FALLTHRU_EDGE (bb
)->dest
;
4821 else if (GET_CODE (new) == LABEL_REF
)
4822 dest
= BRANCH_EDGE (bb
)->dest
;
4829 && dest
!= EXIT_BLOCK_PTR
)
4831 redirect_edge_and_branch_force (e
, dest
);
4833 /* Copy the register setter to the redirected edge.
4834 Don't copy CC0 setters, as CC0 is dead after jump. */
4837 rtx pat
= PATTERN (setcc
);
4838 if (!CC0_P (SET_DEST (pat
)))
4839 insert_insn_on_edge (copy_insn (pat
), e
);
4842 if (gcse_file
!= NULL
)
4844 fprintf (gcse_file
, "JUMP-BYPASS: Proved reg %d in jump_insn %d equals constant ",
4845 regno
, INSN_UID (jump
));
4846 print_rtl (gcse_file
, SET_SRC (set
->expr
));
4847 fprintf (gcse_file
, "\nBypass edge from %d->%d to %d\n",
4848 e
->src
->index
, old_dest
->index
, dest
->index
);
4858 /* Find basic blocks with more than one predecessor that only contain a
4859 single conditional jump. If the result of the comparison is known at
4860 compile-time from any incoming edge, redirect that edge to the
4861 appropriate target. Returns nonzero if a change was made. */
4864 bypass_conditional_jumps ()
4872 /* Note we start at block 1. */
4873 if (ENTRY_BLOCK_PTR
->next_bb
== EXIT_BLOCK_PTR
)
4876 bypass_last_basic_block
= last_basic_block
;
4879 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
->next_bb
->next_bb
,
4880 EXIT_BLOCK_PTR
, next_bb
)
4882 /* Check for more than one predecessor. */
4883 if (bb
->pred
&& bb
->pred
->pred_next
)
4886 for (insn
= bb
->head
;
4887 insn
!= NULL
&& insn
!= NEXT_INSN (bb
->end
);
4888 insn
= NEXT_INSN (insn
))
4889 if (GET_CODE (insn
) == INSN
)
4893 if (GET_CODE (PATTERN (insn
)) != SET
)
4896 dest
= SET_DEST (PATTERN (insn
));
4897 if (REG_P (dest
) || CC0_P (dest
))
4902 else if (GET_CODE (insn
) == JUMP_INSN
)
4904 if (any_condjump_p (insn
) && onlyjump_p (insn
))
4905 changed
|= bypass_block (bb
, setcc
, insn
);
4908 else if (INSN_P (insn
))
4913 /* If we bypassed any register setting insns, we inserted a
4914 copy on the redirected edge. These need to be committed. */
4916 commit_edge_insertions();
4921 /* Compute PRE+LCM working variables. */
4923 /* Local properties of expressions. */
4924 /* Nonzero for expressions that are transparent in the block. */
4925 static sbitmap
*transp
;
4927 /* Nonzero for expressions that are transparent at the end of the block.
4928 This is only zero for expressions killed by abnormal critical edge
4929 created by a calls. */
4930 static sbitmap
*transpout
;
4932 /* Nonzero for expressions that are computed (available) in the block. */
4933 static sbitmap
*comp
;
4935 /* Nonzero for expressions that are locally anticipatable in the block. */
4936 static sbitmap
*antloc
;
4938 /* Nonzero for expressions where this block is an optimal computation
4940 static sbitmap
*pre_optimal
;
4942 /* Nonzero for expressions which are redundant in a particular block. */
4943 static sbitmap
*pre_redundant
;
4945 /* Nonzero for expressions which should be inserted on a specific edge. */
4946 static sbitmap
*pre_insert_map
;
4948 /* Nonzero for expressions which should be deleted in a specific block. */
4949 static sbitmap
*pre_delete_map
;
4951 /* Contains the edge_list returned by pre_edge_lcm. */
4952 static struct edge_list
*edge_list
;
4954 /* Redundant insns. */
4955 static sbitmap pre_redundant_insns
;
4957 /* Allocate vars used for PRE analysis. */
4960 alloc_pre_mem (n_blocks
, n_exprs
)
4961 int n_blocks
, n_exprs
;
4963 transp
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
4964 comp
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
4965 antloc
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
4968 pre_redundant
= NULL
;
4969 pre_insert_map
= NULL
;
4970 pre_delete_map
= NULL
;
4973 ae_kill
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
4975 /* pre_insert and pre_delete are allocated later. */
4978 /* Free vars used for PRE analysis. */
4983 sbitmap_vector_free (transp
);
4984 sbitmap_vector_free (comp
);
4986 /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */
4989 sbitmap_vector_free (pre_optimal
);
4991 sbitmap_vector_free (pre_redundant
);
4993 sbitmap_vector_free (pre_insert_map
);
4995 sbitmap_vector_free (pre_delete_map
);
4997 sbitmap_vector_free (ae_in
);
4999 sbitmap_vector_free (ae_out
);
5001 transp
= comp
= NULL
;
5002 pre_optimal
= pre_redundant
= pre_insert_map
= pre_delete_map
= NULL
;
5003 ae_in
= ae_out
= NULL
;
5006 /* Top level routine to do the dataflow analysis needed by PRE. */
5011 sbitmap trapping_expr
;
5015 compute_local_properties (transp
, comp
, antloc
, &expr_hash_table
);
5016 sbitmap_vector_zero (ae_kill
, last_basic_block
);
5018 /* Collect expressions which might trap. */
5019 trapping_expr
= sbitmap_alloc (expr_hash_table
.n_elems
);
5020 sbitmap_zero (trapping_expr
);
5021 for (ui
= 0; ui
< expr_hash_table
.size
; ui
++)
5024 for (e
= expr_hash_table
.table
[ui
]; e
!= NULL
; e
= e
->next_same_hash
)
5025 if (may_trap_p (e
->expr
))
5026 SET_BIT (trapping_expr
, e
->bitmap_index
);
5029 /* Compute ae_kill for each basic block using:
5033 This is significantly faster than compute_ae_kill. */
5039 /* If the current block is the destination of an abnormal edge, we
5040 kill all trapping expressions because we won't be able to properly
5041 place the instruction on the edge. So make them neither
5042 anticipatable nor transparent. This is fairly conservative. */
5043 for (e
= bb
->pred
; e
; e
= e
->pred_next
)
5044 if (e
->flags
& EDGE_ABNORMAL
)
5046 sbitmap_difference (antloc
[bb
->index
], antloc
[bb
->index
], trapping_expr
);
5047 sbitmap_difference (transp
[bb
->index
], transp
[bb
->index
], trapping_expr
);
5051 sbitmap_a_or_b (ae_kill
[bb
->index
], transp
[bb
->index
], comp
[bb
->index
]);
5052 sbitmap_not (ae_kill
[bb
->index
], ae_kill
[bb
->index
]);
5055 edge_list
= pre_edge_lcm (gcse_file
, expr_hash_table
.n_elems
, transp
, comp
, antloc
,
5056 ae_kill
, &pre_insert_map
, &pre_delete_map
);
5057 sbitmap_vector_free (antloc
);
5059 sbitmap_vector_free (ae_kill
);
5061 sbitmap_free (trapping_expr
);
5066 /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach
5069 VISITED is a pointer to a working buffer for tracking which BB's have
5070 been visited. It is NULL for the top-level call.
5072 We treat reaching expressions that go through blocks containing the same
5073 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
5074 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
5075 2 as not reaching. The intent is to improve the probability of finding
5076 only one reaching expression and to reduce register lifetimes by picking
5077 the closest such expression. */
5080 pre_expr_reaches_here_p_work (occr_bb
, expr
, bb
, visited
)
5081 basic_block occr_bb
;
5088 for (pred
= bb
->pred
; pred
!= NULL
; pred
= pred
->pred_next
)
5090 basic_block pred_bb
= pred
->src
;
5092 if (pred
->src
== ENTRY_BLOCK_PTR
5093 /* Has predecessor has already been visited? */
5094 || visited
[pred_bb
->index
])
5095 ;/* Nothing to do. */
5097 /* Does this predecessor generate this expression? */
5098 else if (TEST_BIT (comp
[pred_bb
->index
], expr
->bitmap_index
))
5100 /* Is this the occurrence we're looking for?
5101 Note that there's only one generating occurrence per block
5102 so we just need to check the block number. */
5103 if (occr_bb
== pred_bb
)
5106 visited
[pred_bb
->index
] = 1;
5108 /* Ignore this predecessor if it kills the expression. */
5109 else if (! TEST_BIT (transp
[pred_bb
->index
], expr
->bitmap_index
))
5110 visited
[pred_bb
->index
] = 1;
5112 /* Neither gen nor kill. */
5115 visited
[pred_bb
->index
] = 1;
5116 if (pre_expr_reaches_here_p_work (occr_bb
, expr
, pred_bb
, visited
))
5121 /* All paths have been checked. */
5125 /* The wrapper for pre_expr_reaches_here_work that ensures that any
5126 memory allocated for that function is returned. */
5129 pre_expr_reaches_here_p (occr_bb
, expr
, bb
)
5130 basic_block occr_bb
;
5135 char *visited
= (char *) xcalloc (last_basic_block
, 1);
5137 rval
= pre_expr_reaches_here_p_work (occr_bb
, expr
, bb
, visited
);
5144 /* Given an expr, generate RTL which we can insert at the end of a BB,
5145 or on an edge. Set the block number of any insns generated to
5149 process_insert_insn (expr
)
5152 rtx reg
= expr
->reaching_reg
;
5153 rtx exp
= copy_rtx (expr
->expr
);
5158 /* If the expression is something that's an operand, like a constant,
5159 just copy it to a register. */
5160 if (general_operand (exp
, GET_MODE (reg
)))
5161 emit_move_insn (reg
, exp
);
5163 /* Otherwise, make a new insn to compute this expression and make sure the
5164 insn will be recognized (this also adds any needed CLOBBERs). Copy the
5165 expression to make sure we don't have any sharing issues. */
5166 else if (insn_invalid_p (emit_insn (gen_rtx_SET (VOIDmode
, reg
, exp
))))
5175 /* Add EXPR to the end of basic block BB.
5177 This is used by both the PRE and code hoisting.
5179 For PRE, we want to verify that the expr is either transparent
5180 or locally anticipatable in the target block. This check makes
5181 no sense for code hoisting. */
5184 insert_insn_end_bb (expr
, bb
, pre
)
5191 rtx reg
= expr
->reaching_reg
;
5192 int regno
= REGNO (reg
);
5195 pat
= process_insert_insn (expr
);
5196 if (pat
== NULL_RTX
|| ! INSN_P (pat
))
5200 while (NEXT_INSN (pat_end
) != NULL_RTX
)
5201 pat_end
= NEXT_INSN (pat_end
);
5203 /* If the last insn is a jump, insert EXPR in front [taking care to
5204 handle cc0, etc. properly]. Similary we need to care trapping
5205 instructions in presence of non-call exceptions. */
5207 if (GET_CODE (insn
) == JUMP_INSN
5208 || (GET_CODE (insn
) == INSN
5209 && (bb
->succ
->succ_next
|| (bb
->succ
->flags
& EDGE_ABNORMAL
))))
5214 /* It should always be the case that we can put these instructions
5215 anywhere in the basic block with performing PRE optimizations.
5217 if (GET_CODE (insn
) == INSN
&& pre
5218 && !TEST_BIT (antloc
[bb
->index
], expr
->bitmap_index
)
5219 && !TEST_BIT (transp
[bb
->index
], expr
->bitmap_index
))
5222 /* If this is a jump table, then we can't insert stuff here. Since
5223 we know the previous real insn must be the tablejump, we insert
5224 the new instruction just before the tablejump. */
5225 if (GET_CODE (PATTERN (insn
)) == ADDR_VEC
5226 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
)
5227 insn
= prev_real_insn (insn
);
5230 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
5231 if cc0 isn't set. */
5232 note
= find_reg_note (insn
, REG_CC_SETTER
, NULL_RTX
);
5234 insn
= XEXP (note
, 0);
5237 rtx maybe_cc0_setter
= prev_nonnote_insn (insn
);
5238 if (maybe_cc0_setter
5239 && INSN_P (maybe_cc0_setter
)
5240 && sets_cc0_p (PATTERN (maybe_cc0_setter
)))
5241 insn
= maybe_cc0_setter
;
5244 /* FIXME: What if something in cc0/jump uses value set in new insn? */
5245 new_insn
= emit_insn_before (pat
, insn
);
5248 /* Likewise if the last insn is a call, as will happen in the presence
5249 of exception handling. */
5250 else if (GET_CODE (insn
) == CALL_INSN
5251 && (bb
->succ
->succ_next
|| (bb
->succ
->flags
& EDGE_ABNORMAL
)))
5253 /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
5254 we search backward and place the instructions before the first
5255 parameter is loaded. Do this for everyone for consistency and a
5256 presumption that we'll get better code elsewhere as well.
5258 It should always be the case that we can put these instructions
5259 anywhere in the basic block with performing PRE optimizations.
5263 && !TEST_BIT (antloc
[bb
->index
], expr
->bitmap_index
)
5264 && !TEST_BIT (transp
[bb
->index
], expr
->bitmap_index
))
5267 /* Since different machines initialize their parameter registers
5268 in different orders, assume nothing. Collect the set of all
5269 parameter registers. */
5270 insn
= find_first_parameter_load (insn
, bb
->head
);
5272 /* If we found all the parameter loads, then we want to insert
5273 before the first parameter load.
5275 If we did not find all the parameter loads, then we might have
5276 stopped on the head of the block, which could be a CODE_LABEL.
5277 If we inserted before the CODE_LABEL, then we would be putting
5278 the insn in the wrong basic block. In that case, put the insn
5279 after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */
5280 while (GET_CODE (insn
) == CODE_LABEL
5281 || NOTE_INSN_BASIC_BLOCK_P (insn
))
5282 insn
= NEXT_INSN (insn
);
5284 new_insn
= emit_insn_before (pat
, insn
);
5287 new_insn
= emit_insn_after (pat
, insn
);
5293 add_label_notes (PATTERN (pat
), new_insn
);
5294 note_stores (PATTERN (pat
), record_set_info
, pat
);
5298 pat
= NEXT_INSN (pat
);
5301 gcse_create_count
++;
5305 fprintf (gcse_file
, "PRE/HOIST: end of bb %d, insn %d, ",
5306 bb
->index
, INSN_UID (new_insn
));
5307 fprintf (gcse_file
, "copying expression %d to reg %d\n",
5308 expr
->bitmap_index
, regno
);
5312 /* Insert partially redundant expressions on edges in the CFG to make
5313 the expressions fully redundant. */
5316 pre_edge_insert (edge_list
, index_map
)
5317 struct edge_list
*edge_list
;
5318 struct expr
**index_map
;
5320 int e
, i
, j
, num_edges
, set_size
, did_insert
= 0;
5323 /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
5324 if it reaches any of the deleted expressions. */
5326 set_size
= pre_insert_map
[0]->size
;
5327 num_edges
= NUM_EDGES (edge_list
);
5328 inserted
= sbitmap_vector_alloc (num_edges
, expr_hash_table
.n_elems
);
5329 sbitmap_vector_zero (inserted
, num_edges
);
5331 for (e
= 0; e
< num_edges
; e
++)
5334 basic_block bb
= INDEX_EDGE_PRED_BB (edge_list
, e
);
5336 for (i
= indx
= 0; i
< set_size
; i
++, indx
+= SBITMAP_ELT_BITS
)
5338 SBITMAP_ELT_TYPE insert
= pre_insert_map
[e
]->elms
[i
];
5340 for (j
= indx
; insert
&& j
< (int) expr_hash_table
.n_elems
; j
++, insert
>>= 1)
5341 if ((insert
& 1) != 0 && index_map
[j
]->reaching_reg
!= NULL_RTX
)
5343 struct expr
*expr
= index_map
[j
];
5346 /* Now look at each deleted occurrence of this expression. */
5347 for (occr
= expr
->antic_occr
; occr
!= NULL
; occr
= occr
->next
)
5349 if (! occr
->deleted_p
)
5352 /* Insert this expression on this edge if if it would
5353 reach the deleted occurrence in BB. */
5354 if (!TEST_BIT (inserted
[e
], j
))
5357 edge eg
= INDEX_EDGE (edge_list
, e
);
5359 /* We can't insert anything on an abnormal and
5360 critical edge, so we insert the insn at the end of
5361 the previous block. There are several alternatives
5362 detailed in Morgans book P277 (sec 10.5) for
5363 handling this situation. This one is easiest for
5366 if ((eg
->flags
& EDGE_ABNORMAL
) == EDGE_ABNORMAL
)
5367 insert_insn_end_bb (index_map
[j
], bb
, 0);
5370 insn
= process_insert_insn (index_map
[j
]);
5371 insert_insn_on_edge (insn
, eg
);
5376 fprintf (gcse_file
, "PRE/HOIST: edge (%d,%d), ",
5378 INDEX_EDGE_SUCC_BB (edge_list
, e
)->index
);
5379 fprintf (gcse_file
, "copy expression %d\n",
5380 expr
->bitmap_index
);
5383 update_ld_motion_stores (expr
);
5384 SET_BIT (inserted
[e
], j
);
5386 gcse_create_count
++;
5393 sbitmap_vector_free (inserted
);
5397 /* Copy the result of INSN to REG. INDX is the expression number. */
5400 pre_insert_copy_insn (expr
, insn
)
5404 rtx reg
= expr
->reaching_reg
;
5405 int regno
= REGNO (reg
);
5406 int indx
= expr
->bitmap_index
;
5407 rtx set
= single_set (insn
);
5413 new_insn
= emit_insn_after (gen_move_insn (reg
, SET_DEST (set
)), insn
);
5415 /* Keep register set table up to date. */
5416 record_one_set (regno
, new_insn
);
5418 gcse_create_count
++;
5422 "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
5423 BLOCK_NUM (insn
), INSN_UID (new_insn
), indx
,
5424 INSN_UID (insn
), regno
);
5425 update_ld_motion_stores (expr
);
5428 /* Copy available expressions that reach the redundant expression
5429 to `reaching_reg'. */
5432 pre_insert_copies ()
5439 /* For each available expression in the table, copy the result to
5440 `reaching_reg' if the expression reaches a deleted one.
5442 ??? The current algorithm is rather brute force.
5443 Need to do some profiling. */
5445 for (i
= 0; i
< expr_hash_table
.size
; i
++)
5446 for (expr
= expr_hash_table
.table
[i
]; expr
!= NULL
; expr
= expr
->next_same_hash
)
5448 /* If the basic block isn't reachable, PPOUT will be TRUE. However,
5449 we don't want to insert a copy here because the expression may not
5450 really be redundant. So only insert an insn if the expression was
5451 deleted. This test also avoids further processing if the
5452 expression wasn't deleted anywhere. */
5453 if (expr
->reaching_reg
== NULL
)
5456 for (occr
= expr
->antic_occr
; occr
!= NULL
; occr
= occr
->next
)
5458 if (! occr
->deleted_p
)
5461 for (avail
= expr
->avail_occr
; avail
!= NULL
; avail
= avail
->next
)
5463 rtx insn
= avail
->insn
;
5465 /* No need to handle this one if handled already. */
5466 if (avail
->copied_p
)
5469 /* Don't handle this one if it's a redundant one. */
5470 if (TEST_BIT (pre_redundant_insns
, INSN_CUID (insn
)))
5473 /* Or if the expression doesn't reach the deleted one. */
5474 if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail
->insn
),
5476 BLOCK_FOR_INSN (occr
->insn
)))
5479 /* Copy the result of avail to reaching_reg. */
5480 pre_insert_copy_insn (expr
, insn
);
5481 avail
->copied_p
= 1;
5487 /* Emit move from SRC to DEST noting the equivalence with expression computed
5490 gcse_emit_move_after (src
, dest
, insn
)
5491 rtx src
, dest
, insn
;
5494 rtx set
= single_set (insn
), set2
;
5498 /* This should never fail since we're creating a reg->reg copy
5499 we've verified to be valid. */
5501 new = emit_insn_after (gen_move_insn (dest
, src
), insn
);
5503 /* Note the equivalence for local CSE pass. */
5504 set2
= single_set (new);
5505 if (!set2
|| !rtx_equal_p (SET_DEST (set2
), dest
))
5507 if ((note
= find_reg_equal_equiv_note (insn
)))
5508 eqv
= XEXP (note
, 0);
5510 eqv
= SET_SRC (set
);
5512 set_unique_reg_note (new, REG_EQUAL
, copy_insn_1 (eqv
));
5517 /* Delete redundant computations.
5518 Deletion is done by changing the insn to copy the `reaching_reg' of
5519 the expression into the result of the SET. It is left to later passes
5520 (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
5522 Returns nonzero if a change is made. */
5533 for (i
= 0; i
< expr_hash_table
.size
; i
++)
5534 for (expr
= expr_hash_table
.table
[i
]; expr
!= NULL
; expr
= expr
->next_same_hash
)
5536 int indx
= expr
->bitmap_index
;
5538 /* We only need to search antic_occr since we require
5541 for (occr
= expr
->antic_occr
; occr
!= NULL
; occr
= occr
->next
)
5543 rtx insn
= occr
->insn
;
5545 basic_block bb
= BLOCK_FOR_INSN (insn
);
5547 if (TEST_BIT (pre_delete_map
[bb
->index
], indx
))
5549 set
= single_set (insn
);
5553 /* Create a pseudo-reg to store the result of reaching
5554 expressions into. Get the mode for the new pseudo from
5555 the mode of the original destination pseudo. */
5556 if (expr
->reaching_reg
== NULL
)
5558 = gen_reg_rtx (GET_MODE (SET_DEST (set
)));
5560 gcse_emit_move_after (expr
->reaching_reg
, SET_DEST (set
), insn
);
5562 occr
->deleted_p
= 1;
5563 SET_BIT (pre_redundant_insns
, INSN_CUID (insn
));
5570 "PRE: redundant insn %d (expression %d) in ",
5571 INSN_UID (insn
), indx
);
5572 fprintf (gcse_file
, "bb %d, reaching reg is %d\n",
5573 bb
->index
, REGNO (expr
->reaching_reg
));
5582 /* Perform GCSE optimizations using PRE.
5583 This is called by one_pre_gcse_pass after all the dataflow analysis
5586 This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
5587 lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
5588 Compiler Design and Implementation.
5590 ??? A new pseudo reg is created to hold the reaching expression. The nice
5591 thing about the classical approach is that it would try to use an existing
5592 reg. If the register can't be adequately optimized [i.e. we introduce
5593 reload problems], one could add a pass here to propagate the new register
5596 ??? We don't handle single sets in PARALLELs because we're [currently] not
5597 able to copy the rest of the parallel when we insert copies to create full
5598 redundancies from partial redundancies. However, there's no reason why we
5599 can't handle PARALLELs in the cases where there are no partial
5606 int did_insert
, changed
;
5607 struct expr
**index_map
;
5610 /* Compute a mapping from expression number (`bitmap_index') to
5611 hash table entry. */
5613 index_map
= (struct expr
**) xcalloc (expr_hash_table
.n_elems
, sizeof (struct expr
*));
5614 for (i
= 0; i
< expr_hash_table
.size
; i
++)
5615 for (expr
= expr_hash_table
.table
[i
]; expr
!= NULL
; expr
= expr
->next_same_hash
)
5616 index_map
[expr
->bitmap_index
] = expr
;
5618 /* Reset bitmap used to track which insns are redundant. */
5619 pre_redundant_insns
= sbitmap_alloc (max_cuid
);
5620 sbitmap_zero (pre_redundant_insns
);
5622 /* Delete the redundant insns first so that
5623 - we know what register to use for the new insns and for the other
5624 ones with reaching expressions
5625 - we know which insns are redundant when we go to create copies */
5627 changed
= pre_delete ();
5629 did_insert
= pre_edge_insert (edge_list
, index_map
);
5631 /* In other places with reaching expressions, copy the expression to the
5632 specially allocated pseudo-reg that reaches the redundant expr. */
5633 pre_insert_copies ();
5636 commit_edge_insertions ();
5641 sbitmap_free (pre_redundant_insns
);
5645 /* Top level routine to perform one PRE GCSE pass.
5647 Return nonzero if a change was made. */
5650 one_pre_gcse_pass (pass
)
5655 gcse_subst_count
= 0;
5656 gcse_create_count
= 0;
5658 alloc_hash_table (max_cuid
, &expr_hash_table
, 0);
5659 add_noreturn_fake_exit_edges ();
5661 compute_ld_motion_mems ();
5663 compute_hash_table (&expr_hash_table
);
5664 trim_ld_motion_mems ();
5666 dump_hash_table (gcse_file
, "Expression", &expr_hash_table
);
5668 if (expr_hash_table
.n_elems
> 0)
5670 alloc_pre_mem (last_basic_block
, expr_hash_table
.n_elems
);
5671 compute_pre_data ();
5672 changed
|= pre_gcse ();
5673 free_edge_list (edge_list
);
5678 remove_fake_edges ();
5679 free_hash_table (&expr_hash_table
);
5683 fprintf (gcse_file
, "\nPRE GCSE of %s, pass %d: %d bytes needed, ",
5684 current_function_name
, pass
, bytes_used
);
5685 fprintf (gcse_file
, "%d substs, %d insns created\n",
5686 gcse_subst_count
, gcse_create_count
);
5692 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
5693 If notes are added to an insn which references a CODE_LABEL, the
5694 LABEL_NUSES count is incremented. We have to add REG_LABEL notes,
5695 because the following loop optimization pass requires them. */
5697 /* ??? This is very similar to the loop.c add_label_notes function. We
5698 could probably share code here. */
5700 /* ??? If there was a jump optimization pass after gcse and before loop,
5701 then we would not need to do this here, because jump would add the
5702 necessary REG_LABEL notes. */
5705 add_label_notes (x
, insn
)
5709 enum rtx_code code
= GET_CODE (x
);
5713 if (code
== LABEL_REF
&& !LABEL_REF_NONLOCAL_P (x
))
5715 /* This code used to ignore labels that referred to dispatch tables to
5716 avoid flow generating (slighly) worse code.
5718 We no longer ignore such label references (see LABEL_REF handling in
5719 mark_jump_label for additional information). */
5721 REG_NOTES (insn
) = gen_rtx_INSN_LIST (REG_LABEL
, XEXP (x
, 0),
5723 if (LABEL_P (XEXP (x
, 0)))
5724 LABEL_NUSES (XEXP (x
, 0))++;
5728 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
5731 add_label_notes (XEXP (x
, i
), insn
);
5732 else if (fmt
[i
] == 'E')
5733 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
5734 add_label_notes (XVECEXP (x
, i
, j
), insn
);
5738 /* Compute transparent outgoing information for each block.
5740 An expression is transparent to an edge unless it is killed by
5741 the edge itself. This can only happen with abnormal control flow,
5742 when the edge is traversed through a call. This happens with
5743 non-local labels and exceptions.
5745 This would not be necessary if we split the edge. While this is
5746 normally impossible for abnormal critical edges, with some effort
5747 it should be possible with exception handling, since we still have
5748 control over which handler should be invoked. But due to increased
5749 EH table sizes, this may not be worthwhile. */
5752 compute_transpout ()
5758 sbitmap_vector_ones (transpout
, last_basic_block
);
5762 /* Note that flow inserted a nop a the end of basic blocks that
5763 end in call instructions for reasons other than abnormal
5765 if (GET_CODE (bb
->end
) != CALL_INSN
)
5768 for (i
= 0; i
< expr_hash_table
.size
; i
++)
5769 for (expr
= expr_hash_table
.table
[i
]; expr
; expr
= expr
->next_same_hash
)
5770 if (GET_CODE (expr
->expr
) == MEM
)
5772 if (GET_CODE (XEXP (expr
->expr
, 0)) == SYMBOL_REF
5773 && CONSTANT_POOL_ADDRESS_P (XEXP (expr
->expr
, 0)))
5776 /* ??? Optimally, we would use interprocedural alias
5777 analysis to determine if this mem is actually killed
5779 RESET_BIT (transpout
[bb
->index
], expr
->bitmap_index
);
5784 /* Removal of useless null pointer checks */
5786 /* Called via note_stores. X is set by SETTER. If X is a register we must
5787 invalidate nonnull_local and set nonnull_killed. DATA is really a
5788 `null_pointer_info *'.
5790 We ignore hard registers. */
5793 invalidate_nonnull_info (x
, setter
, data
)
5795 rtx setter ATTRIBUTE_UNUSED
;
5799 struct null_pointer_info
*npi
= (struct null_pointer_info
*) data
;
5801 while (GET_CODE (x
) == SUBREG
)
5804 /* Ignore anything that is not a register or is a hard register. */
5805 if (GET_CODE (x
) != REG
5806 || REGNO (x
) < npi
->min_reg
5807 || REGNO (x
) >= npi
->max_reg
)
5810 regno
= REGNO (x
) - npi
->min_reg
;
5812 RESET_BIT (npi
->nonnull_local
[npi
->current_block
->index
], regno
);
5813 SET_BIT (npi
->nonnull_killed
[npi
->current_block
->index
], regno
);
5816 /* Do null-pointer check elimination for the registers indicated in
5817 NPI. NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps;
5818 they are not our responsibility to free. */
5821 delete_null_pointer_checks_1 (block_reg
, nonnull_avin
,
5823 unsigned int *block_reg
;
5824 sbitmap
*nonnull_avin
;
5825 sbitmap
*nonnull_avout
;
5826 struct null_pointer_info
*npi
;
5828 basic_block bb
, current_block
;
5829 sbitmap
*nonnull_local
= npi
->nonnull_local
;
5830 sbitmap
*nonnull_killed
= npi
->nonnull_killed
;
5831 int something_changed
= 0;
5833 /* Compute local properties, nonnull and killed. A register will have
5834 the nonnull property if at the end of the current block its value is
5835 known to be nonnull. The killed property indicates that somewhere in
5836 the block any information we had about the register is killed.
5838 Note that a register can have both properties in a single block. That
5839 indicates that it's killed, then later in the block a new value is
5841 sbitmap_vector_zero (nonnull_local
, last_basic_block
);
5842 sbitmap_vector_zero (nonnull_killed
, last_basic_block
);
5844 FOR_EACH_BB (current_block
)
5846 rtx insn
, stop_insn
;
5848 /* Set the current block for invalidate_nonnull_info. */
5849 npi
->current_block
= current_block
;
5851 /* Scan each insn in the basic block looking for memory references and
5853 stop_insn
= NEXT_INSN (current_block
->end
);
5854 for (insn
= current_block
->head
;
5856 insn
= NEXT_INSN (insn
))
5861 /* Ignore anything that is not a normal insn. */
5862 if (! INSN_P (insn
))
5865 /* Basically ignore anything that is not a simple SET. We do have
5866 to make sure to invalidate nonnull_local and set nonnull_killed
5867 for such insns though. */
5868 set
= single_set (insn
);
5871 note_stores (PATTERN (insn
), invalidate_nonnull_info
, npi
);
5875 /* See if we've got a usable memory load. We handle it first
5876 in case it uses its address register as a dest (which kills
5877 the nonnull property). */
5878 if (GET_CODE (SET_SRC (set
)) == MEM
5879 && GET_CODE ((reg
= XEXP (SET_SRC (set
), 0))) == REG
5880 && REGNO (reg
) >= npi
->min_reg
5881 && REGNO (reg
) < npi
->max_reg
)
5882 SET_BIT (nonnull_local
[current_block
->index
],
5883 REGNO (reg
) - npi
->min_reg
);
5885 /* Now invalidate stuff clobbered by this insn. */
5886 note_stores (PATTERN (insn
), invalidate_nonnull_info
, npi
);
5888 /* And handle stores, we do these last since any sets in INSN can
5889 not kill the nonnull property if it is derived from a MEM
5890 appearing in a SET_DEST. */
5891 if (GET_CODE (SET_DEST (set
)) == MEM
5892 && GET_CODE ((reg
= XEXP (SET_DEST (set
), 0))) == REG
5893 && REGNO (reg
) >= npi
->min_reg
5894 && REGNO (reg
) < npi
->max_reg
)
5895 SET_BIT (nonnull_local
[current_block
->index
],
5896 REGNO (reg
) - npi
->min_reg
);
5900 /* Now compute global properties based on the local properties. This
5901 is a classic global availability algorithm. */
5902 compute_available (nonnull_local
, nonnull_killed
,
5903 nonnull_avout
, nonnull_avin
);
5905 /* Now look at each bb and see if it ends with a compare of a value
5909 rtx last_insn
= bb
->end
;
5910 rtx condition
, earliest
;
5911 int compare_and_branch
;
5913 /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and
5914 since BLOCK_REG[BB] is zero if this block did not end with a
5915 comparison against zero, this condition works. */
5916 if (block_reg
[bb
->index
] < npi
->min_reg
5917 || block_reg
[bb
->index
] >= npi
->max_reg
)
5920 /* LAST_INSN is a conditional jump. Get its condition. */
5921 condition
= get_condition (last_insn
, &earliest
);
5923 /* If we can't determine the condition then skip. */
5927 /* Is the register known to have a nonzero value? */
5928 if (!TEST_BIT (nonnull_avout
[bb
->index
], block_reg
[bb
->index
] - npi
->min_reg
))
5931 /* Try to compute whether the compare/branch at the loop end is one or
5932 two instructions. */
5933 if (earliest
== last_insn
)
5934 compare_and_branch
= 1;
5935 else if (earliest
== prev_nonnote_insn (last_insn
))
5936 compare_and_branch
= 2;
5940 /* We know the register in this comparison is nonnull at exit from
5941 this block. We can optimize this comparison. */
5942 if (GET_CODE (condition
) == NE
)
5946 new_jump
= emit_jump_insn_after (gen_jump (JUMP_LABEL (last_insn
)),
5948 JUMP_LABEL (new_jump
) = JUMP_LABEL (last_insn
);
5949 LABEL_NUSES (JUMP_LABEL (new_jump
))++;
5950 emit_barrier_after (new_jump
);
5953 something_changed
= 1;
5954 delete_insn (last_insn
);
5955 if (compare_and_branch
== 2)
5956 delete_insn (earliest
);
5957 purge_dead_edges (bb
);
5959 /* Don't check this block again. (Note that BLOCK_END is
5960 invalid here; we deleted the last instruction in the
5962 block_reg
[bb
->index
] = 0;
5965 return something_changed
;
5968 /* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
5971 This is conceptually similar to global constant/copy propagation and
5972 classic global CSE (it even uses the same dataflow equations as cprop).
5974 If a register is used as memory address with the form (mem (reg)), then we
5975 know that REG can not be zero at that point in the program. Any instruction
5976 which sets REG "kills" this property.
5978 So, if every path leading to a conditional branch has an available memory
5979 reference of that form, then we know the register can not have the value
5980 zero at the conditional branch.
5982 So we merely need to compute the local properties and propagate that data
5983 around the cfg, then optimize where possible.
5985 We run this pass two times. Once before CSE, then again after CSE. This
5986 has proven to be the most profitable approach. It is rare for new
5987 optimization opportunities of this nature to appear after the first CSE
5990 This could probably be integrated with global cprop with a little work. */
5993 delete_null_pointer_checks (f
)
5994 rtx f ATTRIBUTE_UNUSED
;
5996 sbitmap
*nonnull_avin
, *nonnull_avout
;
5997 unsigned int *block_reg
;
6002 struct null_pointer_info npi
;
6003 int something_changed
= 0;
6005 /* If we have only a single block, then there's nothing to do. */
6006 if (n_basic_blocks
<= 1)
6009 /* Trying to perform global optimizations on flow graphs which have
6010 a high connectivity will take a long time and is unlikely to be
6011 particularly useful.
6013 In normal circumstances a cfg should have about twice as many edges
6014 as blocks. But we do not want to punish small functions which have
6015 a couple switch statements. So we require a relatively large number
6016 of basic blocks and the ratio of edges to blocks to be high. */
6017 if (n_basic_blocks
> 1000 && n_edges
/ n_basic_blocks
>= 20)
6020 /* We need four bitmaps, each with a bit for each register in each
6022 max_reg
= max_reg_num ();
6023 regs_per_pass
= get_bitmap_width (4, last_basic_block
, max_reg
);
6025 /* Allocate bitmaps to hold local and global properties. */
6026 npi
.nonnull_local
= sbitmap_vector_alloc (last_basic_block
, regs_per_pass
);
6027 npi
.nonnull_killed
= sbitmap_vector_alloc (last_basic_block
, regs_per_pass
);
6028 nonnull_avin
= sbitmap_vector_alloc (last_basic_block
, regs_per_pass
);
6029 nonnull_avout
= sbitmap_vector_alloc (last_basic_block
, regs_per_pass
);
6031 /* Go through the basic blocks, seeing whether or not each block
6032 ends with a conditional branch whose condition is a comparison
6033 against zero. Record the register compared in BLOCK_REG. */
6034 block_reg
= (unsigned int *) xcalloc (last_basic_block
, sizeof (int));
6037 rtx last_insn
= bb
->end
;
6038 rtx condition
, earliest
, reg
;
6040 /* We only want conditional branches. */
6041 if (GET_CODE (last_insn
) != JUMP_INSN
6042 || !any_condjump_p (last_insn
)
6043 || !onlyjump_p (last_insn
))
6046 /* LAST_INSN is a conditional jump. Get its condition. */
6047 condition
= get_condition (last_insn
, &earliest
);
6049 /* If we were unable to get the condition, or it is not an equality
6050 comparison against zero then there's nothing we can do. */
6052 || (GET_CODE (condition
) != NE
&& GET_CODE (condition
) != EQ
)
6053 || GET_CODE (XEXP (condition
, 1)) != CONST_INT
6054 || (XEXP (condition
, 1)
6055 != CONST0_RTX (GET_MODE (XEXP (condition
, 0)))))
6058 /* We must be checking a register against zero. */
6059 reg
= XEXP (condition
, 0);
6060 if (GET_CODE (reg
) != REG
)
6063 block_reg
[bb
->index
] = REGNO (reg
);
6066 /* Go through the algorithm for each block of registers. */
6067 for (reg
= FIRST_PSEUDO_REGISTER
; reg
< max_reg
; reg
+= regs_per_pass
)
6070 npi
.max_reg
= MIN (reg
+ regs_per_pass
, max_reg
);
6071 something_changed
|= delete_null_pointer_checks_1 (block_reg
,
6077 /* Free the table of registers compared at the end of every block. */
6081 sbitmap_vector_free (npi
.nonnull_local
);
6082 sbitmap_vector_free (npi
.nonnull_killed
);
6083 sbitmap_vector_free (nonnull_avin
);
6084 sbitmap_vector_free (nonnull_avout
);
6086 return something_changed
;
6089 /* Code Hoisting variables and subroutines. */
6091 /* Very busy expressions. */
6092 static sbitmap
*hoist_vbein
;
6093 static sbitmap
*hoist_vbeout
;
6095 /* Hoistable expressions. */
6096 static sbitmap
*hoist_exprs
;
6098 /* Dominator bitmaps. */
6099 dominance_info dominators
;
6101 /* ??? We could compute post dominators and run this algorithm in
6102 reverse to perform tail merging, doing so would probably be
6103 more effective than the tail merging code in jump.c.
6105 It's unclear if tail merging could be run in parallel with
6106 code hoisting. It would be nice. */
6108 /* Allocate vars used for code hoisting analysis. */
6111 alloc_code_hoist_mem (n_blocks
, n_exprs
)
6112 int n_blocks
, n_exprs
;
6114 antloc
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
6115 transp
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
6116 comp
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
6118 hoist_vbein
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
6119 hoist_vbeout
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
6120 hoist_exprs
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
6121 transpout
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
6124 /* Free vars used for code hoisting analysis. */
6127 free_code_hoist_mem ()
6129 sbitmap_vector_free (antloc
);
6130 sbitmap_vector_free (transp
);
6131 sbitmap_vector_free (comp
);
6133 sbitmap_vector_free (hoist_vbein
);
6134 sbitmap_vector_free (hoist_vbeout
);
6135 sbitmap_vector_free (hoist_exprs
);
6136 sbitmap_vector_free (transpout
);
6138 free_dominance_info (dominators
);
6141 /* Compute the very busy expressions at entry/exit from each block.
6143 An expression is very busy if all paths from a given point
6144 compute the expression. */
6147 compute_code_hoist_vbeinout ()
6149 int changed
, passes
;
6152 sbitmap_vector_zero (hoist_vbeout
, last_basic_block
);
6153 sbitmap_vector_zero (hoist_vbein
, last_basic_block
);
6162 /* We scan the blocks in the reverse order to speed up
6164 FOR_EACH_BB_REVERSE (bb
)
6166 changed
|= sbitmap_a_or_b_and_c_cg (hoist_vbein
[bb
->index
], antloc
[bb
->index
],
6167 hoist_vbeout
[bb
->index
], transp
[bb
->index
]);
6168 if (bb
->next_bb
!= EXIT_BLOCK_PTR
)
6169 sbitmap_intersection_of_succs (hoist_vbeout
[bb
->index
], hoist_vbein
, bb
->index
);
6176 fprintf (gcse_file
, "hoisting vbeinout computation: %d passes\n", passes
);
6179 /* Top level routine to do the dataflow analysis needed by code hoisting. */
6182 compute_code_hoist_data ()
6184 compute_local_properties (transp
, comp
, antloc
, &expr_hash_table
);
6185 compute_transpout ();
6186 compute_code_hoist_vbeinout ();
6187 dominators
= calculate_dominance_info (CDI_DOMINATORS
);
6189 fprintf (gcse_file
, "\n");
6192 /* Determine if the expression identified by EXPR_INDEX would
6193 reach BB unimpared if it was placed at the end of EXPR_BB.
6195 It's unclear exactly what Muchnick meant by "unimpared". It seems
6196 to me that the expression must either be computed or transparent in
6197 *every* block in the path(s) from EXPR_BB to BB. Any other definition
6198 would allow the expression to be hoisted out of loops, even if
6199 the expression wasn't a loop invariant.
6201 Contrast this to reachability for PRE where an expression is
6202 considered reachable if *any* path reaches instead of *all*
6206 hoist_expr_reaches_here_p (expr_bb
, expr_index
, bb
, visited
)
6207 basic_block expr_bb
;
6213 int visited_allocated_locally
= 0;
6216 if (visited
== NULL
)
6218 visited_allocated_locally
= 1;
6219 visited
= xcalloc (last_basic_block
, 1);
6222 for (pred
= bb
->pred
; pred
!= NULL
; pred
= pred
->pred_next
)
6224 basic_block pred_bb
= pred
->src
;
6226 if (pred
->src
== ENTRY_BLOCK_PTR
)
6228 else if (pred_bb
== expr_bb
)
6230 else if (visited
[pred_bb
->index
])
6233 /* Does this predecessor generate this expression? */
6234 else if (TEST_BIT (comp
[pred_bb
->index
], expr_index
))
6236 else if (! TEST_BIT (transp
[pred_bb
->index
], expr_index
))
6242 visited
[pred_bb
->index
] = 1;
6243 if (! hoist_expr_reaches_here_p (expr_bb
, expr_index
,
6248 if (visited_allocated_locally
)
6251 return (pred
== NULL
);
6254 /* Actually perform code hoisting. */
6259 basic_block bb
, dominated
;
6261 unsigned int domby_len
;
6263 struct expr
**index_map
;
6266 sbitmap_vector_zero (hoist_exprs
, last_basic_block
);
6268 /* Compute a mapping from expression number (`bitmap_index') to
6269 hash table entry. */
6271 index_map
= (struct expr
**) xcalloc (expr_hash_table
.n_elems
, sizeof (struct expr
*));
6272 for (i
= 0; i
< expr_hash_table
.size
; i
++)
6273 for (expr
= expr_hash_table
.table
[i
]; expr
!= NULL
; expr
= expr
->next_same_hash
)
6274 index_map
[expr
->bitmap_index
] = expr
;
6276 /* Walk over each basic block looking for potentially hoistable
6277 expressions, nothing gets hoisted from the entry block. */
6281 int insn_inserted_p
;
6283 domby_len
= get_dominated_by (dominators
, bb
, &domby
);
6284 /* Examine each expression that is very busy at the exit of this
6285 block. These are the potentially hoistable expressions. */
6286 for (i
= 0; i
< hoist_vbeout
[bb
->index
]->n_bits
; i
++)
6290 if (TEST_BIT (hoist_vbeout
[bb
->index
], i
)
6291 && TEST_BIT (transpout
[bb
->index
], i
))
6293 /* We've found a potentially hoistable expression, now
6294 we look at every block BB dominates to see if it
6295 computes the expression. */
6296 for (j
= 0; j
< domby_len
; j
++)
6298 dominated
= domby
[j
];
6299 /* Ignore self dominance. */
6300 if (bb
== dominated
)
6302 /* We've found a dominated block, now see if it computes
6303 the busy expression and whether or not moving that
6304 expression to the "beginning" of that block is safe. */
6305 if (!TEST_BIT (antloc
[dominated
->index
], i
))
6308 /* Note if the expression would reach the dominated block
6309 unimpared if it was placed at the end of BB.
6311 Keep track of how many times this expression is hoistable
6312 from a dominated block into BB. */
6313 if (hoist_expr_reaches_here_p (bb
, i
, dominated
, NULL
))
6317 /* If we found more than one hoistable occurrence of this
6318 expression, then note it in the bitmap of expressions to
6319 hoist. It makes no sense to hoist things which are computed
6320 in only one BB, and doing so tends to pessimize register
6321 allocation. One could increase this value to try harder
6322 to avoid any possible code expansion due to register
6323 allocation issues; however experiments have shown that
6324 the vast majority of hoistable expressions are only movable
6325 from two successors, so raising this threshhold is likely
6326 to nullify any benefit we get from code hoisting. */
6329 SET_BIT (hoist_exprs
[bb
->index
], i
);
6334 /* If we found nothing to hoist, then quit now. */
6341 /* Loop over all the hoistable expressions. */
6342 for (i
= 0; i
< hoist_exprs
[bb
->index
]->n_bits
; i
++)
6344 /* We want to insert the expression into BB only once, so
6345 note when we've inserted it. */
6346 insn_inserted_p
= 0;
6348 /* These tests should be the same as the tests above. */
6349 if (TEST_BIT (hoist_vbeout
[bb
->index
], i
))
6351 /* We've found a potentially hoistable expression, now
6352 we look at every block BB dominates to see if it
6353 computes the expression. */
6354 for (j
= 0; j
< domby_len
; j
++)
6356 dominated
= domby
[j
];
6357 /* Ignore self dominance. */
6358 if (bb
== dominated
)
6361 /* We've found a dominated block, now see if it computes
6362 the busy expression and whether or not moving that
6363 expression to the "beginning" of that block is safe. */
6364 if (!TEST_BIT (antloc
[dominated
->index
], i
))
6367 /* The expression is computed in the dominated block and
6368 it would be safe to compute it at the start of the
6369 dominated block. Now we have to determine if the
6370 expression would reach the dominated block if it was
6371 placed at the end of BB. */
6372 if (hoist_expr_reaches_here_p (bb
, i
, dominated
, NULL
))
6374 struct expr
*expr
= index_map
[i
];
6375 struct occr
*occr
= expr
->antic_occr
;
6379 /* Find the right occurrence of this expression. */
6380 while (BLOCK_FOR_INSN (occr
->insn
) != dominated
&& occr
)
6383 /* Should never happen. */
6389 set
= single_set (insn
);
6393 /* Create a pseudo-reg to store the result of reaching
6394 expressions into. Get the mode for the new pseudo
6395 from the mode of the original destination pseudo. */
6396 if (expr
->reaching_reg
== NULL
)
6398 = gen_reg_rtx (GET_MODE (SET_DEST (set
)));
6400 gcse_emit_move_after (expr
->reaching_reg
, SET_DEST (set
), insn
);
6402 occr
->deleted_p
= 1;
6403 if (!insn_inserted_p
)
6405 insert_insn_end_bb (index_map
[i
], bb
, 0);
6406 insn_inserted_p
= 1;
6418 /* Top level routine to perform one code hoisting (aka unification) pass
6420 Return nonzero if a change was made. */
6423 one_code_hoisting_pass ()
6427 alloc_hash_table (max_cuid
, &expr_hash_table
, 0);
6428 compute_hash_table (&expr_hash_table
);
6430 dump_hash_table (gcse_file
, "Code Hosting Expressions", &expr_hash_table
);
6432 if (expr_hash_table
.n_elems
> 0)
6434 alloc_code_hoist_mem (last_basic_block
, expr_hash_table
.n_elems
);
6435 compute_code_hoist_data ();
6437 free_code_hoist_mem ();
6440 free_hash_table (&expr_hash_table
);
6445 /* Here we provide the things required to do store motion towards
6446 the exit. In order for this to be effective, gcse also needed to
6447 be taught how to move a load when it is kill only by a store to itself.
6452 void foo(float scale)
6454 for (i=0; i<10; i++)
6458 'i' is both loaded and stored to in the loop. Normally, gcse cannot move
6459 the load out since its live around the loop, and stored at the bottom
6462 The 'Load Motion' referred to and implemented in this file is
6463 an enhancement to gcse which when using edge based lcm, recognizes
6464 this situation and allows gcse to move the load out of the loop.
6466 Once gcse has hoisted the load, store motion can then push this
6467 load towards the exit, and we end up with no loads or stores of 'i'
6470 /* This will search the ldst list for a matching expression. If it
6471 doesn't find one, we create one and initialize it. */
6473 static struct ls_expr
*
6477 struct ls_expr
* ptr
;
6479 for (ptr
= first_ls_expr(); ptr
!= NULL
; ptr
= next_ls_expr (ptr
))
6480 if (expr_equiv_p (ptr
->pattern
, x
))
6485 ptr
= (struct ls_expr
*) xmalloc (sizeof (struct ls_expr
));
6487 ptr
->next
= pre_ldst_mems
;
6490 ptr
->loads
= NULL_RTX
;
6491 ptr
->stores
= NULL_RTX
;
6492 ptr
->reaching_reg
= NULL_RTX
;
6495 ptr
->hash_index
= 0;
6496 pre_ldst_mems
= ptr
;
6502 /* Free up an individual ldst entry. */
6505 free_ldst_entry (ptr
)
6506 struct ls_expr
* ptr
;
6508 free_INSN_LIST_list (& ptr
->loads
);
6509 free_INSN_LIST_list (& ptr
->stores
);
6514 /* Free up all memory associated with the ldst list. */
6519 while (pre_ldst_mems
)
6521 struct ls_expr
* tmp
= pre_ldst_mems
;
6523 pre_ldst_mems
= pre_ldst_mems
->next
;
6525 free_ldst_entry (tmp
);
6528 pre_ldst_mems
= NULL
;
6531 /* Dump debugging info about the ldst list. */
6534 print_ldst_list (file
)
6537 struct ls_expr
* ptr
;
6539 fprintf (file
, "LDST list: \n");
6541 for (ptr
= first_ls_expr(); ptr
!= NULL
; ptr
= next_ls_expr (ptr
))
6543 fprintf (file
, " Pattern (%3d): ", ptr
->index
);
6545 print_rtl (file
, ptr
->pattern
);
6547 fprintf (file
, "\n Loads : ");
6550 print_rtl (file
, ptr
->loads
);
6552 fprintf (file
, "(nil)");
6554 fprintf (file
, "\n Stores : ");
6557 print_rtl (file
, ptr
->stores
);
6559 fprintf (file
, "(nil)");
6561 fprintf (file
, "\n\n");
6564 fprintf (file
, "\n");
6567 /* Returns 1 if X is in the list of ldst only expressions. */
6569 static struct ls_expr
*
6570 find_rtx_in_ldst (x
)
6573 struct ls_expr
* ptr
;
6575 for (ptr
= pre_ldst_mems
; ptr
!= NULL
; ptr
= ptr
->next
)
6576 if (expr_equiv_p (ptr
->pattern
, x
) && ! ptr
->invalid
)
6582 /* Assign each element of the list of mems a monotonically increasing value. */
6587 struct ls_expr
* ptr
;
6590 for (ptr
= pre_ldst_mems
; ptr
!= NULL
; ptr
= ptr
->next
)
6596 /* Return first item in the list. */
6598 static inline struct ls_expr
*
6601 return pre_ldst_mems
;
6604 /* Return the next item in ther list after the specified one. */
6606 static inline struct ls_expr
*
6608 struct ls_expr
* ptr
;
6613 /* Load Motion for loads which only kill themselves. */
6615 /* Return true if x is a simple MEM operation, with no registers or
6616 side effects. These are the types of loads we consider for the
6617 ld_motion list, otherwise we let the usual aliasing take care of it. */
6623 if (GET_CODE (x
) != MEM
)
6626 if (MEM_VOLATILE_P (x
))
6629 if (GET_MODE (x
) == BLKmode
)
6632 if (!rtx_varies_p (XEXP (x
, 0), 0))
6638 /* Make sure there isn't a buried reference in this pattern anywhere.
6639 If there is, invalidate the entry for it since we're not capable
6640 of fixing it up just yet.. We have to be sure we know about ALL
6641 loads since the aliasing code will allow all entries in the
6642 ld_motion list to not-alias itself. If we miss a load, we will get
6643 the wrong value since gcse might common it and we won't know to
6647 invalidate_any_buried_refs (x
)
6652 struct ls_expr
* ptr
;
6654 /* Invalidate it in the list. */
6655 if (GET_CODE (x
) == MEM
&& simple_mem (x
))
6657 ptr
= ldst_entry (x
);
6661 /* Recursively process the insn. */
6662 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
6664 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
6667 invalidate_any_buried_refs (XEXP (x
, i
));
6668 else if (fmt
[i
] == 'E')
6669 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
6670 invalidate_any_buried_refs (XVECEXP (x
, i
, j
));
6674 /* Find all the 'simple' MEMs which are used in LOADs and STORES. Simple
6675 being defined as MEM loads and stores to symbols, with no
6676 side effects and no registers in the expression. If there are any
6677 uses/defs which don't match this criteria, it is invalidated and
6678 trimmed out later. */
6681 compute_ld_motion_mems ()
6683 struct ls_expr
* ptr
;
6687 pre_ldst_mems
= NULL
;
6691 for (insn
= bb
->head
;
6692 insn
&& insn
!= NEXT_INSN (bb
->end
);
6693 insn
= NEXT_INSN (insn
))
6695 if (GET_RTX_CLASS (GET_CODE (insn
)) == 'i')
6697 if (GET_CODE (PATTERN (insn
)) == SET
)
6699 rtx src
= SET_SRC (PATTERN (insn
));
6700 rtx dest
= SET_DEST (PATTERN (insn
));
6702 /* Check for a simple LOAD... */
6703 if (GET_CODE (src
) == MEM
&& simple_mem (src
))
6705 ptr
= ldst_entry (src
);
6706 if (GET_CODE (dest
) == REG
)
6707 ptr
->loads
= alloc_INSN_LIST (insn
, ptr
->loads
);
6713 /* Make sure there isn't a buried load somewhere. */
6714 invalidate_any_buried_refs (src
);
6717 /* Check for stores. Don't worry about aliased ones, they
6718 will block any movement we might do later. We only care
6719 about this exact pattern since those are the only
6720 circumstance that we will ignore the aliasing info. */
6721 if (GET_CODE (dest
) == MEM
&& simple_mem (dest
))
6723 ptr
= ldst_entry (dest
);
6725 if (GET_CODE (src
) != MEM
6726 && GET_CODE (src
) != ASM_OPERANDS
)
6727 ptr
->stores
= alloc_INSN_LIST (insn
, ptr
->stores
);
6733 invalidate_any_buried_refs (PATTERN (insn
));
6739 /* Remove any references that have been either invalidated or are not in the
6740 expression list for pre gcse. */
6743 trim_ld_motion_mems ()
6745 struct ls_expr
* last
= NULL
;
6746 struct ls_expr
* ptr
= first_ls_expr ();
6750 int del
= ptr
->invalid
;
6751 struct expr
* expr
= NULL
;
6753 /* Delete if entry has been made invalid. */
6759 /* Delete if we cannot find this mem in the expression list. */
6760 for (i
= 0; i
< expr_hash_table
.size
&& del
; i
++)
6762 for (expr
= expr_hash_table
.table
[i
];
6764 expr
= expr
->next_same_hash
)
6765 if (expr_equiv_p (expr
->expr
, ptr
->pattern
))
6777 last
->next
= ptr
->next
;
6778 free_ldst_entry (ptr
);
6783 pre_ldst_mems
= pre_ldst_mems
->next
;
6784 free_ldst_entry (ptr
);
6785 ptr
= pre_ldst_mems
;
6790 /* Set the expression field if we are keeping it. */
6797 /* Show the world what we've found. */
6798 if (gcse_file
&& pre_ldst_mems
!= NULL
)
6799 print_ldst_list (gcse_file
);
6802 /* This routine will take an expression which we are replacing with
6803 a reaching register, and update any stores that are needed if
6804 that expression is in the ld_motion list. Stores are updated by
6805 copying their SRC to the reaching register, and then storeing
6806 the reaching register into the store location. These keeps the
6807 correct value in the reaching register for the loads. */
6810 update_ld_motion_stores (expr
)
6813 struct ls_expr
* mem_ptr
;
6815 if ((mem_ptr
= find_rtx_in_ldst (expr
->expr
)))
6817 /* We can try to find just the REACHED stores, but is shouldn't
6818 matter to set the reaching reg everywhere... some might be
6819 dead and should be eliminated later. */
6821 /* We replace SET mem = expr with
6823 SET mem = reg , where reg is the
6824 reaching reg used in the load. */
6825 rtx list
= mem_ptr
->stores
;
6827 for ( ; list
!= NULL_RTX
; list
= XEXP (list
, 1))
6829 rtx insn
= XEXP (list
, 0);
6830 rtx pat
= PATTERN (insn
);
6831 rtx src
= SET_SRC (pat
);
6832 rtx reg
= expr
->reaching_reg
;
6835 /* If we've already copied it, continue. */
6836 if (expr
->reaching_reg
== src
)
6841 fprintf (gcse_file
, "PRE: store updated with reaching reg ");
6842 print_rtl (gcse_file
, expr
->reaching_reg
);
6843 fprintf (gcse_file
, ":\n ");
6844 print_inline_rtx (gcse_file
, insn
, 8);
6845 fprintf (gcse_file
, "\n");
6848 copy
= gen_move_insn ( reg
, SET_SRC (pat
));
6849 new = emit_insn_before (copy
, insn
);
6850 record_one_set (REGNO (reg
), new);
6851 SET_SRC (pat
) = reg
;
6853 /* un-recognize this pattern since it's probably different now. */
6854 INSN_CODE (insn
) = -1;
6855 gcse_create_count
++;
6860 /* Store motion code. */
6862 /* This is used to communicate the target bitvector we want to use in the
6863 reg_set_info routine when called via the note_stores mechanism. */
6864 static sbitmap
* regvec
;
6866 /* Used in computing the reverse edge graph bit vectors. */
6867 static sbitmap
* st_antloc
;
6869 /* Global holding the number of store expressions we are dealing with. */
6870 static int num_stores
;
6872 /* Checks to set if we need to mark a register set. Called from note_stores. */
6875 reg_set_info (dest
, setter
, data
)
6876 rtx dest
, setter ATTRIBUTE_UNUSED
;
6877 void * data ATTRIBUTE_UNUSED
;
6879 if (GET_CODE (dest
) == SUBREG
)
6880 dest
= SUBREG_REG (dest
);
6882 if (GET_CODE (dest
) == REG
)
6883 SET_BIT (*regvec
, REGNO (dest
));
6886 /* Return nonzero if the register operands of expression X are killed
6887 anywhere in basic block BB. */
6890 store_ops_ok (x
, bb
)
6898 /* Repeat is used to turn tail-recursion into iteration. */
6904 code
= GET_CODE (x
);
6908 /* If a reg has changed after us in this
6909 block, the operand has been killed. */
6910 return TEST_BIT (reg_set_in_block
[bb
->index
], REGNO (x
));
6938 i
= GET_RTX_LENGTH (code
) - 1;
6939 fmt
= GET_RTX_FORMAT (code
);
6945 rtx tem
= XEXP (x
, i
);
6947 /* If we are about to do the last recursive call
6948 needed at this level, change it into iteration.
6949 This function is called enough to be worth it. */
6956 if (! store_ops_ok (tem
, bb
))
6959 else if (fmt
[i
] == 'E')
6963 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
6965 if (! store_ops_ok (XVECEXP (x
, i
, j
), bb
))
6974 /* Determine whether insn is MEM store pattern that we will consider moving. */
6977 find_moveable_store (insn
)
6980 struct ls_expr
* ptr
;
6981 rtx dest
= PATTERN (insn
);
6983 if (GET_CODE (dest
) != SET
6984 || GET_CODE (SET_SRC (dest
)) == ASM_OPERANDS
)
6987 dest
= SET_DEST (dest
);
6989 if (GET_CODE (dest
) != MEM
|| MEM_VOLATILE_P (dest
)
6990 || GET_MODE (dest
) == BLKmode
)
6993 if (GET_CODE (XEXP (dest
, 0)) != SYMBOL_REF
)
6996 if (rtx_varies_p (XEXP (dest
, 0), 0))
6999 ptr
= ldst_entry (dest
);
7000 ptr
->stores
= alloc_INSN_LIST (insn
, ptr
->stores
);
7003 /* Perform store motion. Much like gcse, except we move expressions the
7004 other way by looking at the flowgraph in reverse. */
7007 compute_store_table ()
7014 max_gcse_regno
= max_reg_num ();
7016 reg_set_in_block
= (sbitmap
*) sbitmap_vector_alloc (last_basic_block
,
7018 sbitmap_vector_zero (reg_set_in_block
, last_basic_block
);
7021 /* Find all the stores we care about. */
7024 regvec
= & (reg_set_in_block
[bb
->index
]);
7025 for (insn
= bb
->end
;
7026 insn
&& insn
!= PREV_INSN (bb
->end
);
7027 insn
= PREV_INSN (insn
))
7029 /* Ignore anything that is not a normal insn. */
7030 if (! INSN_P (insn
))
7033 if (GET_CODE (insn
) == CALL_INSN
)
7035 bool clobbers_all
= false;
7036 #ifdef NON_SAVING_SETJMP
7037 if (NON_SAVING_SETJMP
7038 && find_reg_note (insn
, REG_SETJMP
, NULL_RTX
))
7039 clobbers_all
= true;
7042 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
7044 || TEST_HARD_REG_BIT (regs_invalidated_by_call
, regno
))
7045 SET_BIT (reg_set_in_block
[bb
->index
], regno
);
7048 pat
= PATTERN (insn
);
7049 note_stores (pat
, reg_set_info
, NULL
);
7051 /* Now that we've marked regs, look for stores. */
7052 if (GET_CODE (pat
) == SET
)
7053 find_moveable_store (insn
);
7057 ret
= enumerate_ldsts ();
7061 fprintf (gcse_file
, "Store Motion Expressions.\n");
7062 print_ldst_list (gcse_file
);
7068 /* Check to see if the load X is aliased with STORE_PATTERN. */
7071 load_kills_store (x
, store_pattern
)
7072 rtx x
, store_pattern
;
7074 if (true_dependence (x
, GET_MODE (x
), store_pattern
, rtx_addr_varies_p
))
7079 /* Go through the entire insn X, looking for any loads which might alias
7080 STORE_PATTERN. Return 1 if found. */
7083 find_loads (x
, store_pattern
)
7084 rtx x
, store_pattern
;
7093 if (GET_CODE (x
) == SET
)
7096 if (GET_CODE (x
) == MEM
)
7098 if (load_kills_store (x
, store_pattern
))
7102 /* Recursively process the insn. */
7103 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
7105 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0 && !ret
; i
--)
7108 ret
|= find_loads (XEXP (x
, i
), store_pattern
);
7109 else if (fmt
[i
] == 'E')
7110 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
7111 ret
|= find_loads (XVECEXP (x
, i
, j
), store_pattern
);
7116 /* Check if INSN kills the store pattern X (is aliased with it).
7117 Return 1 if it it does. */
7120 store_killed_in_insn (x
, insn
)
7123 if (GET_RTX_CLASS (GET_CODE (insn
)) != 'i')
7126 if (GET_CODE (insn
) == CALL_INSN
)
7128 /* A normal or pure call might read from pattern,
7129 but a const call will not. */
7130 return ! CONST_OR_PURE_CALL_P (insn
) || pure_call_p (insn
);
7133 if (GET_CODE (PATTERN (insn
)) == SET
)
7135 rtx pat
= PATTERN (insn
);
7136 /* Check for memory stores to aliased objects. */
7137 if (GET_CODE (SET_DEST (pat
)) == MEM
&& !expr_equiv_p (SET_DEST (pat
), x
))
7138 /* pretend its a load and check for aliasing. */
7139 if (find_loads (SET_DEST (pat
), x
))
7141 return find_loads (SET_SRC (pat
), x
);
7144 return find_loads (PATTERN (insn
), x
);
7147 /* Returns 1 if the expression X is loaded or clobbered on or after INSN
7148 within basic block BB. */
7151 store_killed_after (x
, insn
, bb
)
7160 /* Check if the register operands of the store are OK in this block.
7161 Note that if registers are changed ANYWHERE in the block, we'll
7162 decide we can't move it, regardless of whether it changed above
7163 or below the store. This could be improved by checking the register
7164 operands while looking for aliasing in each insn. */
7165 if (!store_ops_ok (XEXP (x
, 0), bb
))
7168 for ( ; insn
&& insn
!= NEXT_INSN (last
); insn
= NEXT_INSN (insn
))
7169 if (store_killed_in_insn (x
, insn
))
7175 /* Returns 1 if the expression X is loaded or clobbered on or before INSN
7176 within basic block BB. */
7178 store_killed_before (x
, insn
, bb
)
7182 rtx first
= bb
->head
;
7185 return store_killed_in_insn (x
, insn
);
7187 /* Check if the register operands of the store are OK in this block.
7188 Note that if registers are changed ANYWHERE in the block, we'll
7189 decide we can't move it, regardless of whether it changed above
7190 or below the store. This could be improved by checking the register
7191 operands while looking for aliasing in each insn. */
7192 if (!store_ops_ok (XEXP (x
, 0), bb
))
7195 for ( ; insn
&& insn
!= PREV_INSN (first
); insn
= PREV_INSN (insn
))
7196 if (store_killed_in_insn (x
, insn
))
7202 #define ANTIC_STORE_LIST(x) ((x)->loads)
7203 #define AVAIL_STORE_LIST(x) ((x)->stores)
7205 /* Given the table of available store insns at the end of blocks,
7206 determine which ones are not killed by aliasing, and generate
7207 the appropriate vectors for gen and killed. */
7209 build_store_vectors ()
7213 struct ls_expr
* ptr
;
7215 /* Build the gen_vector. This is any store in the table which is not killed
7216 by aliasing later in its block. */
7217 ae_gen
= (sbitmap
*) sbitmap_vector_alloc (last_basic_block
, num_stores
);
7218 sbitmap_vector_zero (ae_gen
, last_basic_block
);
7220 st_antloc
= (sbitmap
*) sbitmap_vector_alloc (last_basic_block
, num_stores
);
7221 sbitmap_vector_zero (st_antloc
, last_basic_block
);
7223 for (ptr
= first_ls_expr (); ptr
!= NULL
; ptr
= next_ls_expr (ptr
))
7225 /* Put all the stores into either the antic list, or the avail list,
7227 rtx store_list
= ptr
->stores
;
7228 ptr
->stores
= NULL_RTX
;
7230 for (st
= store_list
; st
!= NULL
; st
= XEXP (st
, 1))
7232 insn
= XEXP (st
, 0);
7233 bb
= BLOCK_FOR_INSN (insn
);
7235 if (!store_killed_after (ptr
->pattern
, insn
, bb
))
7237 /* If we've already seen an available expression in this block,
7238 we can delete the one we saw already (It occurs earlier in
7239 the block), and replace it with this one). We'll copy the
7240 old SRC expression to an unused register in case there
7241 are any side effects. */
7242 if (TEST_BIT (ae_gen
[bb
->index
], ptr
->index
))
7244 /* Find previous store. */
7246 for (st
= AVAIL_STORE_LIST (ptr
); st
; st
= XEXP (st
, 1))
7247 if (BLOCK_FOR_INSN (XEXP (st
, 0)) == bb
)
7251 rtx r
= gen_reg_rtx (GET_MODE (ptr
->pattern
));
7253 fprintf (gcse_file
, "Removing redundant store:\n");
7254 replace_store_insn (r
, XEXP (st
, 0), bb
);
7255 XEXP (st
, 0) = insn
;
7259 SET_BIT (ae_gen
[bb
->index
], ptr
->index
);
7260 AVAIL_STORE_LIST (ptr
) = alloc_INSN_LIST (insn
,
7261 AVAIL_STORE_LIST (ptr
));
7264 if (!store_killed_before (ptr
->pattern
, insn
, bb
))
7266 SET_BIT (st_antloc
[BLOCK_NUM (insn
)], ptr
->index
);
7267 ANTIC_STORE_LIST (ptr
) = alloc_INSN_LIST (insn
,
7268 ANTIC_STORE_LIST (ptr
));
7272 /* Free the original list of store insns. */
7273 free_INSN_LIST_list (&store_list
);
7276 ae_kill
= (sbitmap
*) sbitmap_vector_alloc (last_basic_block
, num_stores
);
7277 sbitmap_vector_zero (ae_kill
, last_basic_block
);
7279 transp
= (sbitmap
*) sbitmap_vector_alloc (last_basic_block
, num_stores
);
7280 sbitmap_vector_zero (transp
, last_basic_block
);
7282 for (ptr
= first_ls_expr (); ptr
!= NULL
; ptr
= next_ls_expr (ptr
))
7285 if (store_killed_after (ptr
->pattern
, b
->head
, b
))
7287 /* The anticipatable expression is not killed if it's gen'd. */
7289 We leave this check out for now. If we have a code sequence
7290 in a block which looks like:
7294 We should flag this as having an ANTIC expression, NOT
7295 transparent, NOT killed, and AVAIL.
7296 Unfortunately, since we haven't re-written all loads to
7297 use the reaching reg, we'll end up doing an incorrect
7298 Load in the middle here if we push the store down. It happens in
7299 gcc.c-torture/execute/960311-1.c with -O3
7300 If we always kill it in this case, we'll sometimes do
7301 unnecessary work, but it shouldn't actually hurt anything.
7302 if (!TEST_BIT (ae_gen[b], ptr->index)). */
7303 SET_BIT (ae_kill
[b
->index
], ptr
->index
);
7306 SET_BIT (transp
[b
->index
], ptr
->index
);
7309 /* Any block with no exits calls some non-returning function, so
7310 we better mark the store killed here, or we might not store to
7311 it at all. If we knew it was abort, we wouldn't have to store,
7312 but we don't know that for sure. */
7315 fprintf (gcse_file
, "ST_avail and ST_antic (shown under loads..)\n");
7316 print_ldst_list (gcse_file
);
7317 dump_sbitmap_vector (gcse_file
, "st_antloc", "", st_antloc
, last_basic_block
);
7318 dump_sbitmap_vector (gcse_file
, "st_kill", "", ae_kill
, last_basic_block
);
7319 dump_sbitmap_vector (gcse_file
, "Transpt", "", transp
, last_basic_block
);
7320 dump_sbitmap_vector (gcse_file
, "st_avloc", "", ae_gen
, last_basic_block
);
7324 /* Insert an instruction at the beginning of a basic block, and update
7325 the BLOCK_HEAD if needed. */
7328 insert_insn_start_bb (insn
, bb
)
7332 /* Insert at start of successor block. */
7333 rtx prev
= PREV_INSN (bb
->head
);
7334 rtx before
= bb
->head
;
7337 if (GET_CODE (before
) != CODE_LABEL
7338 && (GET_CODE (before
) != NOTE
7339 || NOTE_LINE_NUMBER (before
) != NOTE_INSN_BASIC_BLOCK
))
7342 if (prev
== bb
->end
)
7344 before
= NEXT_INSN (before
);
7347 insn
= emit_insn_after (insn
, prev
);
7351 fprintf (gcse_file
, "STORE_MOTION insert store at start of BB %d:\n",
7353 print_inline_rtx (gcse_file
, insn
, 6);
7354 fprintf (gcse_file
, "\n");
7358 /* This routine will insert a store on an edge. EXPR is the ldst entry for
7359 the memory reference, and E is the edge to insert it on. Returns nonzero
7360 if an edge insertion was performed. */
7363 insert_store (expr
, e
)
7364 struct ls_expr
* expr
;
7371 /* We did all the deleted before this insert, so if we didn't delete a
7372 store, then we haven't set the reaching reg yet either. */
7373 if (expr
->reaching_reg
== NULL_RTX
)
7376 reg
= expr
->reaching_reg
;
7377 insn
= gen_move_insn (expr
->pattern
, reg
);
7379 /* If we are inserting this expression on ALL predecessor edges of a BB,
7380 insert it at the start of the BB, and reset the insert bits on the other
7381 edges so we don't try to insert it on the other edges. */
7383 for (tmp
= e
->dest
->pred
; tmp
; tmp
= tmp
->pred_next
)
7385 int index
= EDGE_INDEX (edge_list
, tmp
->src
, tmp
->dest
);
7386 if (index
== EDGE_INDEX_NO_EDGE
)
7388 if (! TEST_BIT (pre_insert_map
[index
], expr
->index
))
7392 /* If tmp is NULL, we found an insertion on every edge, blank the
7393 insertion vector for these edges, and insert at the start of the BB. */
7394 if (!tmp
&& bb
!= EXIT_BLOCK_PTR
)
7396 for (tmp
= e
->dest
->pred
; tmp
; tmp
= tmp
->pred_next
)
7398 int index
= EDGE_INDEX (edge_list
, tmp
->src
, tmp
->dest
);
7399 RESET_BIT (pre_insert_map
[index
], expr
->index
);
7401 insert_insn_start_bb (insn
, bb
);
7405 /* We can't insert on this edge, so we'll insert at the head of the
7406 successors block. See Morgan, sec 10.5. */
7407 if ((e
->flags
& EDGE_ABNORMAL
) == EDGE_ABNORMAL
)
7409 insert_insn_start_bb (insn
, bb
);
7413 insert_insn_on_edge (insn
, e
);
7417 fprintf (gcse_file
, "STORE_MOTION insert insn on edge (%d, %d):\n",
7418 e
->src
->index
, e
->dest
->index
);
7419 print_inline_rtx (gcse_file
, insn
, 6);
7420 fprintf (gcse_file
, "\n");
7426 /* This routine will replace a store with a SET to a specified register. */
7429 replace_store_insn (reg
, del
, bb
)
7435 insn
= gen_move_insn (reg
, SET_SRC (PATTERN (del
)));
7436 insn
= emit_insn_after (insn
, del
);
7441 "STORE_MOTION delete insn in BB %d:\n ", bb
->index
);
7442 print_inline_rtx (gcse_file
, del
, 6);
7443 fprintf (gcse_file
, "\nSTORE MOTION replaced with insn:\n ");
7444 print_inline_rtx (gcse_file
, insn
, 6);
7445 fprintf (gcse_file
, "\n");
7452 /* Delete a store, but copy the value that would have been stored into
7453 the reaching_reg for later storing. */
7456 delete_store (expr
, bb
)
7457 struct ls_expr
* expr
;
7462 if (expr
->reaching_reg
== NULL_RTX
)
7463 expr
->reaching_reg
= gen_reg_rtx (GET_MODE (expr
->pattern
));
7466 /* If there is more than 1 store, the earlier ones will be dead,
7467 but it doesn't hurt to replace them here. */
7468 reg
= expr
->reaching_reg
;
7470 for (i
= AVAIL_STORE_LIST (expr
); i
; i
= XEXP (i
, 1))
7473 if (BLOCK_FOR_INSN (del
) == bb
)
7475 /* We know there is only one since we deleted redundant
7476 ones during the available computation. */
7477 replace_store_insn (reg
, del
, bb
);
7483 /* Free memory used by store motion. */
7486 free_store_memory ()
7491 sbitmap_vector_free (ae_gen
);
7493 sbitmap_vector_free (ae_kill
);
7495 sbitmap_vector_free (transp
);
7497 sbitmap_vector_free (st_antloc
);
7499 sbitmap_vector_free (pre_insert_map
);
7501 sbitmap_vector_free (pre_delete_map
);
7502 if (reg_set_in_block
)
7503 sbitmap_vector_free (reg_set_in_block
);
7505 ae_gen
= ae_kill
= transp
= st_antloc
= NULL
;
7506 pre_insert_map
= pre_delete_map
= reg_set_in_block
= NULL
;
7509 /* Perform store motion. Much like gcse, except we move expressions the
7510 other way by looking at the flowgraph in reverse. */
7517 struct ls_expr
* ptr
;
7518 int update_flow
= 0;
7522 fprintf (gcse_file
, "before store motion\n");
7523 print_rtl (gcse_file
, get_insns ());
7527 init_alias_analysis ();
7529 /* Find all the stores that are live to the end of their block. */
7530 num_stores
= compute_store_table ();
7531 if (num_stores
== 0)
7533 sbitmap_vector_free (reg_set_in_block
);
7534 end_alias_analysis ();
7538 /* Now compute whats actually available to move. */
7539 add_noreturn_fake_exit_edges ();
7540 build_store_vectors ();
7542 edge_list
= pre_edge_rev_lcm (gcse_file
, num_stores
, transp
, ae_gen
,
7543 st_antloc
, ae_kill
, &pre_insert_map
,
7546 /* Now we want to insert the new stores which are going to be needed. */
7547 for (ptr
= first_ls_expr (); ptr
!= NULL
; ptr
= next_ls_expr (ptr
))
7550 if (TEST_BIT (pre_delete_map
[bb
->index
], ptr
->index
))
7551 delete_store (ptr
, bb
);
7553 for (x
= 0; x
< NUM_EDGES (edge_list
); x
++)
7554 if (TEST_BIT (pre_insert_map
[x
], ptr
->index
))
7555 update_flow
|= insert_store (ptr
, INDEX_EDGE (edge_list
, x
));
7559 commit_edge_insertions ();
7561 free_store_memory ();
7562 free_edge_list (edge_list
);
7563 remove_fake_edges ();
7564 end_alias_analysis ();
7568 /* Entry point for jump bypassing optimization pass. */
7576 /* We do not construct an accurate cfg in functions which call
7577 setjmp, so just punt to be safe. */
7578 if (current_function_calls_setjmp
)
7581 /* For calling dump_foo fns from gdb. */
7582 debug_stderr
= stderr
;
7585 /* Identify the basic block information for this function, including
7586 successors and predecessors. */
7587 max_gcse_regno
= max_reg_num ();
7590 dump_flow_info (file
);
7592 /* Return if there's nothing to do. */
7593 if (n_basic_blocks
<= 1)
7596 /* Trying to perform global optimizations on flow graphs which have
7597 a high connectivity will take a long time and is unlikely to be
7598 particularly useful.
7600 In normal circumstances a cfg should have about twice as many edges
7601 as blocks. But we do not want to punish small functions which have
7602 a couple switch statements. So we require a relatively large number
7603 of basic blocks and the ratio of edges to blocks to be high. */
7604 if (n_basic_blocks
> 1000 && n_edges
/ n_basic_blocks
>= 20)
7606 if (warn_disabled_optimization
)
7607 warning ("BYPASS disabled: %d > 1000 basic blocks and %d >= 20 edges/basic block",
7608 n_basic_blocks
, n_edges
/ n_basic_blocks
);
7612 /* If allocating memory for the cprop bitmap would take up too much
7613 storage it's better just to disable the optimization. */
7615 * SBITMAP_SET_SIZE (max_gcse_regno
)
7616 * sizeof (SBITMAP_ELT_TYPE
)) > MAX_GCSE_MEMORY
)
7618 if (warn_disabled_optimization
)
7619 warning ("GCSE disabled: %d basic blocks and %d registers",
7620 n_basic_blocks
, max_gcse_regno
);
7625 /* See what modes support reg/reg copy operations. */
7626 if (! can_copy_init_p
)
7628 compute_can_copy ();
7629 can_copy_init_p
= 1;
7632 gcc_obstack_init (&gcse_obstack
);
7635 /* We need alias. */
7636 init_alias_analysis ();
7638 /* Record where pseudo-registers are set. This data is kept accurate
7639 during each pass. ??? We could also record hard-reg information here
7640 [since it's unchanging], however it is currently done during hash table
7643 It may be tempting to compute MEM set information here too, but MEM sets
7644 will be subject to code motion one day and thus we need to compute
7645 information about memory sets when we build the hash tables. */
7647 alloc_reg_set_mem (max_gcse_regno
);
7648 compute_sets (get_insns ());
7650 max_gcse_regno
= max_reg_num ();
7651 alloc_gcse_mem (get_insns ());
7652 changed
= one_cprop_pass (1, 1, 1);
7657 fprintf (file
, "BYPASS of %s: %d basic blocks, ",
7658 current_function_name
, n_basic_blocks
);
7659 fprintf (file
, "%d bytes\n\n", bytes_used
);
7662 obstack_free (&gcse_obstack
, NULL
);
7663 free_reg_set_mem ();
7665 /* We are finished with alias. */
7666 end_alias_analysis ();
7667 allocate_reg_info (max_reg_num (), FALSE
, FALSE
);
7672 #include "gt-gcse.h"