1 /* Global common subexpression elimination/Partial redundancy elimination
2 and global constant/copy propagation for GNU compiler.
3 Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
23 - reordering of memory allocation and freeing to be more space efficient
24 - do rough calc of how many regs are needed in each block, and a rough
25 calc of how many regs are available in each class and use that to
26 throttle back the code in cases where RTX_COST is minimal.
27 - a store to the same address as a load does not kill the load if the
28 source of the store is also the destination of the load. Handling this
29 allows more load motion, particularly out of loops.
30 - ability to realloc sbitmap vectors would allow one initial computation
31 of reg_set_in_block with only subsequent additions, rather than
32 recomputing it for each pass
36 /* References searched while implementing this.
38 Compilers Principles, Techniques and Tools
42 Global Optimization by Suppression of Partial Redundancies
44 communications of the acm, Vol. 22, Num. 2, Feb. 1979
46 A Portable Machine-Independent Global Optimizer - Design and Measurements
48 Stanford Ph.D. thesis, Dec. 1983
50 A Fast Algorithm for Code Movement Optimization
52 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
54 A Solution to a Problem with Morel and Renvoise's
55 Global Optimization by Suppression of Partial Redundancies
56 K-H Drechsler, M.P. Stadel
57 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
59 Practical Adaptation of the Global Optimization
60 Algorithm of Morel and Renvoise
62 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
64 Efficiently Computing Static Single Assignment Form and the Control
66 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
67 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
70 J. Knoop, O. Ruthing, B. Steffen
71 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
73 What's In a Region? Or Computing Control Dependence Regions in Near-Linear
74 Time for Reducible Flow Control
76 ACM Letters on Programming Languages and Systems,
77 Vol. 2, Num. 1-4, Mar-Dec 1993
79 An Efficient Representation for Sparse Sets
80 Preston Briggs, Linda Torczon
81 ACM Letters on Programming Languages and Systems,
82 Vol. 2, Num. 1-4, Mar-Dec 1993
84 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
85 K-H Drechsler, M.P. Stadel
86 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
88 Partial Dead Code Elimination
89 J. Knoop, O. Ruthing, B. Steffen
90 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
92 Effective Partial Redundancy Elimination
93 P. Briggs, K.D. Cooper
94 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
96 The Program Structure Tree: Computing Control Regions in Linear Time
97 R. Johnson, D. Pearson, K. Pingali
98 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
100 Optimal Code Motion: Theory and Practice
101 J. Knoop, O. Ruthing, B. Steffen
102 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
104 The power of assignment motion
105 J. Knoop, O. Ruthing, B. Steffen
106 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
108 Global code motion / global value numbering
110 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
112 Value Driven Redundancy Elimination
114 Rice University Ph.D. thesis, Apr. 1996
118 Massively Scalar Compiler Project, Rice University, Sep. 1996
120 High Performance Compilers for Parallel Computing
124 Advanced Compiler Design and Implementation
126 Morgan Kaufmann, 1997
128 Building an Optimizing Compiler
132 People wishing to speed up the code here should read:
133 Elimination Algorithms for Data Flow Analysis
134 B.G. Ryder, M.C. Paull
135 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
137 How to Analyze Large Programs Efficiently and Informatively
138 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
139 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
141 People wishing to do something different can find various possibilities
142 in the above papers and elsewhere.
147 #include "coretypes.h"
155 #include "hard-reg-set.h"
158 #include "insn-config.h"
160 #include "basic-block.h"
162 #include "function.h"
171 #include "tree-pass.h"
174 /* Propagate flow information through back edges and thus enable PRE's
175 moving loop invariant calculations out of loops.
177 Originally this tended to create worse overall code, but several
178 improvements during the development of PRE seem to have made following
179 back edges generally a win.
181 Note much of the loop invariant code motion done here would normally
182 be done by loop.c, which has more heuristics for when to move invariants
183 out of loops. At some point we might need to move some of those
184 heuristics into gcse.c. */
186 /* We support GCSE via Partial Redundancy Elimination. PRE optimizations
187 are a superset of those done by GCSE.
189 We perform the following steps:
191 1) Compute basic block information.
193 2) Compute table of places where registers are set.
195 3) Perform copy/constant propagation.
197 4) Perform global cse using lazy code motion if not optimizing
198 for size, or code hoisting if we are.
200 5) Perform another pass of copy/constant propagation.
202 Two passes of copy/constant propagation are done because the first one
203 enables more GCSE and the second one helps to clean up the copies that
204 GCSE creates. This is needed more for PRE than for Classic because Classic
205 GCSE will try to use an existing register containing the common
206 subexpression rather than create a new one. This is harder to do for PRE
207 because of the code motion (which Classic GCSE doesn't do).
209 Expressions we are interested in GCSE-ing are of the form
210 (set (pseudo-reg) (expression)).
211 Function want_to_gcse_p says what these are.
213 PRE handles moving invariant expressions out of loops (by treating them as
214 partially redundant).
216 Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
217 assignment) based GVN (global value numbering). L. T. Simpson's paper
218 (Rice University) on value numbering is a useful reference for this.
220 **********************
222 We used to support multiple passes but there are diminishing returns in
223 doing so. The first pass usually makes 90% of the changes that are doable.
224 A second pass can make a few more changes made possible by the first pass.
225 Experiments show any further passes don't make enough changes to justify
228 A study of spec92 using an unlimited number of passes:
229 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
230 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
231 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
233 It was found doing copy propagation between each pass enables further
236 PRE is quite expensive in complicated functions because the DFA can take
237 a while to converge. Hence we only perform one pass. The parameter
238 max-gcse-passes can be modified if one wants to experiment.
240 **********************
242 The steps for PRE are:
244 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
246 2) Perform the data flow analysis for PRE.
248 3) Delete the redundant instructions
250 4) Insert the required copies [if any] that make the partially
251 redundant instructions fully redundant.
253 5) For other reaching expressions, insert an instruction to copy the value
254 to a newly created pseudo that will reach the redundant instruction.
256 The deletion is done first so that when we do insertions we
257 know which pseudo reg to use.
259 Various papers have argued that PRE DFA is expensive (O(n^2)) and others
260 argue it is not. The number of iterations for the algorithm to converge
261 is typically 2-4 so I don't view it as that expensive (relatively speaking).
263 PRE GCSE depends heavily on the second CSE pass to clean up the copies
264 we create. To make an expression reach the place where it's redundant,
265 the result of the expression is copied to a new register, and the redundant
266 expression is deleted by replacing it with this new register. Classic GCSE
267 doesn't have this problem as much as it computes the reaching defs of
268 each register in each block and thus can try to use an existing
271 /* GCSE global vars. */
273 /* Note whether or not we should run jump optimization after gcse. We
274 want to do this for two cases.
276 * If we changed any jumps via cprop.
278 * If we added any labels via edge splitting. */
279 static int run_jump_opt_after_gcse
;
281 /* An obstack for our working variables. */
282 static struct obstack gcse_obstack
;
284 struct reg_use
{rtx reg_rtx
; };
286 /* Hash table of expressions. */
290 /* The expression (SET_SRC for expressions, PATTERN for assignments). */
292 /* Index in the available expression bitmaps. */
294 /* Next entry with the same hash. */
295 struct expr
*next_same_hash
;
296 /* List of anticipatable occurrences in basic blocks in the function.
297 An "anticipatable occurrence" is one that is the first occurrence in the
298 basic block, the operands are not modified in the basic block prior
299 to the occurrence and the output is not used between the start of
300 the block and the occurrence. */
301 struct occr
*antic_occr
;
302 /* List of available occurrence in basic blocks in the function.
303 An "available occurrence" is one that is the last occurrence in the
304 basic block and the operands are not modified by following statements in
305 the basic block [including this insn]. */
306 struct occr
*avail_occr
;
307 /* Non-null if the computation is PRE redundant.
308 The value is the newly created pseudo-reg to record a copy of the
309 expression in all the places that reach the redundant copy. */
313 /* Occurrence of an expression.
314 There is one per basic block. If a pattern appears more than once the
315 last appearance is used [or first for anticipatable expressions]. */
319 /* Next occurrence of this expression. */
321 /* The insn that computes the expression. */
323 /* Nonzero if this [anticipatable] occurrence has been deleted. */
325 /* Nonzero if this [available] occurrence has been copied to
327 /* ??? This is mutually exclusive with deleted_p, so they could share
332 /* Expression and copy propagation hash tables.
333 Each hash table is an array of buckets.
334 ??? It is known that if it were an array of entries, structure elements
335 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
336 not clear whether in the final analysis a sufficient amount of memory would
337 be saved as the size of the available expression bitmaps would be larger
338 [one could build a mapping table without holes afterwards though].
339 Someday I'll perform the computation and figure it out. */
344 This is an array of `expr_hash_table_size' elements. */
347 /* Size of the hash table, in elements. */
350 /* Number of hash table elements. */
351 unsigned int n_elems
;
353 /* Whether the table is expression of copy propagation one. */
357 /* Expression hash table. */
358 static struct hash_table expr_hash_table
;
360 /* Copy propagation hash table. */
361 static struct hash_table set_hash_table
;
363 /* Mapping of uids to cuids.
364 Only real insns get cuids. */
365 static int *uid_cuid
;
367 /* Highest UID in UID_CUID. */
370 /* Get the cuid of an insn. */
371 #ifdef ENABLE_CHECKING
372 #define INSN_CUID(INSN) \
373 (gcc_assert (INSN_UID (INSN) <= max_uid), uid_cuid[INSN_UID (INSN)])
375 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
378 /* Number of cuids. */
381 /* Mapping of cuids to insns. */
382 static rtx
*cuid_insn
;
384 /* Get insn from cuid. */
385 #define CUID_INSN(CUID) (cuid_insn[CUID])
387 /* Maximum register number in function prior to doing gcse + 1.
388 Registers created during this pass have regno >= max_gcse_regno.
389 This is named with "gcse" to not collide with global of same name. */
390 static unsigned int max_gcse_regno
;
392 /* Table of registers that are modified.
394 For each register, each element is a list of places where the pseudo-reg
397 For simplicity, GCSE is done on sets of pseudo-regs only. PRE GCSE only
398 requires knowledge of which blocks kill which regs [and thus could use
399 a bitmap instead of the lists `reg_set_table' uses].
401 `reg_set_table' and could be turned into an array of bitmaps (num-bbs x
402 num-regs) [however perhaps it may be useful to keep the data as is]. One
403 advantage of recording things this way is that `reg_set_table' is fairly
404 sparse with respect to pseudo regs but for hard regs could be fairly dense
405 [relatively speaking]. And recording sets of pseudo-regs in lists speeds
406 up functions like compute_transp since in the case of pseudo-regs we only
407 need to iterate over the number of times a pseudo-reg is set, not over the
408 number of basic blocks [clearly there is a bit of a slow down in the cases
409 where a pseudo is set more than once in a block, however it is believed
410 that the net effect is to speed things up]. This isn't done for hard-regs
411 because recording call-clobbered hard-regs in `reg_set_table' at each
412 function call can consume a fair bit of memory, and iterating over
413 hard-regs stored this way in compute_transp will be more expensive. */
415 typedef struct reg_set
417 /* The next setting of this register. */
418 struct reg_set
*next
;
419 /* The index of the block where it was set. */
423 static reg_set
**reg_set_table
;
425 /* Size of `reg_set_table'.
426 The table starts out at max_gcse_regno + slop, and is enlarged as
428 static int reg_set_table_size
;
430 /* Amount to grow `reg_set_table' by when it's full. */
431 #define REG_SET_TABLE_SLOP 100
433 /* This is a list of expressions which are MEMs and will be used by load
435 Load motion tracks MEMs which aren't killed by
436 anything except itself. (i.e., loads and stores to a single location).
437 We can then allow movement of these MEM refs with a little special
438 allowance. (all stores copy the same value to the reaching reg used
439 for the loads). This means all values used to store into memory must have
440 no side effects so we can re-issue the setter value.
441 Store Motion uses this structure as an expression table to track stores
442 which look interesting, and might be moveable towards the exit block. */
446 struct expr
* expr
; /* Gcse expression reference for LM. */
447 rtx pattern
; /* Pattern of this mem. */
448 rtx pattern_regs
; /* List of registers mentioned by the mem. */
449 rtx loads
; /* INSN list of loads seen. */
450 rtx stores
; /* INSN list of stores seen. */
451 struct ls_expr
* next
; /* Next in the list. */
452 int invalid
; /* Invalid for some reason. */
453 int index
; /* If it maps to a bitmap index. */
454 unsigned int hash_index
; /* Index when in a hash table. */
455 rtx reaching_reg
; /* Register to use when re-writing. */
458 /* Array of implicit set patterns indexed by basic block index. */
459 static rtx
*implicit_sets
;
461 /* Head of the list of load/store memory refs. */
462 static struct ls_expr
* pre_ldst_mems
= NULL
;
464 /* Hashtable for the load/store memory refs. */
465 static htab_t pre_ldst_table
= NULL
;
467 /* Bitmap containing one bit for each register in the program.
468 Used when performing GCSE to track which registers have been set since
469 the start of the basic block. */
470 static regset reg_set_bitmap
;
472 /* For each block, a bitmap of registers set in the block.
473 This is used by compute_transp.
474 It is computed during hash table computation and not by compute_sets
475 as it includes registers added since the last pass (or between cprop and
476 gcse) and it's currently not easy to realloc sbitmap vectors. */
477 static sbitmap
*reg_set_in_block
;
479 /* Array, indexed by basic block number for a list of insns which modify
480 memory within that block. */
481 static rtx
* modify_mem_list
;
482 static bitmap modify_mem_list_set
;
484 /* This array parallels modify_mem_list, but is kept canonicalized. */
485 static rtx
* canon_modify_mem_list
;
487 /* Bitmap indexed by block numbers to record which blocks contain
489 static bitmap blocks_with_calls
;
491 /* Various variables for statistics gathering. */
493 /* Memory used in a pass.
494 This isn't intended to be absolutely precise. Its intent is only
495 to keep an eye on memory usage. */
496 static int bytes_used
;
498 /* GCSE substitutions made. */
499 static int gcse_subst_count
;
500 /* Number of copy instructions created. */
501 static int gcse_create_count
;
502 /* Number of local constants propagated. */
503 static int local_const_prop_count
;
504 /* Number of local copies propagated. */
505 static int local_copy_prop_count
;
506 /* Number of global constants propagated. */
507 static int global_const_prop_count
;
508 /* Number of global copies propagated. */
509 static int global_copy_prop_count
;
511 /* For available exprs */
512 static sbitmap
*ae_kill
, *ae_gen
;
514 static void compute_can_copy (void);
515 static void *gmalloc (size_t) ATTRIBUTE_MALLOC
;
516 static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC
;
517 static void *grealloc (void *, size_t);
518 static void *gcse_alloc (unsigned long);
519 static void alloc_gcse_mem (void);
520 static void free_gcse_mem (void);
521 static void alloc_reg_set_mem (int);
522 static void free_reg_set_mem (void);
523 static void record_one_set (int, rtx
);
524 static void record_set_info (rtx
, rtx
, void *);
525 static void compute_sets (void);
526 static void hash_scan_insn (rtx
, struct hash_table
*, int);
527 static void hash_scan_set (rtx
, rtx
, struct hash_table
*);
528 static void hash_scan_clobber (rtx
, rtx
, struct hash_table
*);
529 static void hash_scan_call (rtx
, rtx
, struct hash_table
*);
530 static int want_to_gcse_p (rtx
);
531 static bool can_assign_to_reg_p (rtx
);
532 static bool gcse_constant_p (rtx
);
533 static int oprs_unchanged_p (rtx
, rtx
, int);
534 static int oprs_anticipatable_p (rtx
, rtx
);
535 static int oprs_available_p (rtx
, rtx
);
536 static void insert_expr_in_table (rtx
, enum machine_mode
, rtx
, int, int,
537 struct hash_table
*);
538 static void insert_set_in_table (rtx
, rtx
, struct hash_table
*);
539 static unsigned int hash_expr (rtx
, enum machine_mode
, int *, int);
540 static unsigned int hash_set (int, int);
541 static int expr_equiv_p (rtx
, rtx
);
542 static void record_last_reg_set_info (rtx
, int);
543 static void record_last_mem_set_info (rtx
);
544 static void record_last_set_info (rtx
, rtx
, void *);
545 static void compute_hash_table (struct hash_table
*);
546 static void alloc_hash_table (int, struct hash_table
*, int);
547 static void free_hash_table (struct hash_table
*);
548 static void compute_hash_table_work (struct hash_table
*);
549 static void dump_hash_table (FILE *, const char *, struct hash_table
*);
550 static struct expr
*lookup_set (unsigned int, struct hash_table
*);
551 static struct expr
*next_set (unsigned int, struct expr
*);
552 static void reset_opr_set_tables (void);
553 static int oprs_not_set_p (rtx
, rtx
);
554 static void mark_call (rtx
);
555 static void mark_set (rtx
, rtx
);
556 static void mark_clobber (rtx
, rtx
);
557 static void mark_oprs_set (rtx
);
558 static void alloc_cprop_mem (int, int);
559 static void free_cprop_mem (void);
560 static void compute_transp (rtx
, int, sbitmap
*, int);
561 static void compute_transpout (void);
562 static void compute_local_properties (sbitmap
*, sbitmap
*, sbitmap
*,
563 struct hash_table
*);
564 static void compute_cprop_data (void);
565 static void find_used_regs (rtx
*, void *);
566 static int try_replace_reg (rtx
, rtx
, rtx
);
567 static struct expr
*find_avail_set (int, rtx
);
568 static int cprop_jump (basic_block
, rtx
, rtx
, rtx
, rtx
);
569 static void mems_conflict_for_gcse_p (rtx
, rtx
, void *);
570 static int load_killed_in_block_p (basic_block
, int, rtx
, int);
571 static void canon_list_insert (rtx
, rtx
, void *);
572 static int cprop_insn (rtx
, int);
573 static int cprop (int);
574 static void find_implicit_sets (void);
575 static int one_cprop_pass (int, bool, bool);
576 static bool constprop_register (rtx
, rtx
, rtx
, bool);
577 static struct expr
*find_bypass_set (int, int);
578 static bool reg_killed_on_edge (rtx
, edge
);
579 static int bypass_block (basic_block
, rtx
, rtx
);
580 static int bypass_conditional_jumps (void);
581 static void alloc_pre_mem (int, int);
582 static void free_pre_mem (void);
583 static void compute_pre_data (void);
584 static int pre_expr_reaches_here_p (basic_block
, struct expr
*,
586 static void insert_insn_end_bb (struct expr
*, basic_block
, int);
587 static void pre_insert_copy_insn (struct expr
*, rtx
);
588 static void pre_insert_copies (void);
589 static int pre_delete (void);
590 static int pre_gcse (void);
591 static int one_pre_gcse_pass (int);
592 static void add_label_notes (rtx
, rtx
);
593 static void alloc_code_hoist_mem (int, int);
594 static void free_code_hoist_mem (void);
595 static void compute_code_hoist_vbeinout (void);
596 static void compute_code_hoist_data (void);
597 static int hoist_expr_reaches_here_p (basic_block
, int, basic_block
, char *);
598 static void hoist_code (void);
599 static int one_code_hoisting_pass (void);
600 static rtx
process_insert_insn (struct expr
*);
601 static int pre_edge_insert (struct edge_list
*, struct expr
**);
602 static int pre_expr_reaches_here_p_work (basic_block
, struct expr
*,
603 basic_block
, char *);
604 static struct ls_expr
* ldst_entry (rtx
);
605 static void free_ldst_entry (struct ls_expr
*);
606 static void free_ldst_mems (void);
607 static void print_ldst_list (FILE *);
608 static struct ls_expr
* find_rtx_in_ldst (rtx
);
609 static int enumerate_ldsts (void);
610 static inline struct ls_expr
* first_ls_expr (void);
611 static inline struct ls_expr
* next_ls_expr (struct ls_expr
*);
612 static int simple_mem (rtx
);
613 static void invalidate_any_buried_refs (rtx
);
614 static void compute_ld_motion_mems (void);
615 static void trim_ld_motion_mems (void);
616 static void update_ld_motion_stores (struct expr
*);
617 static void reg_set_info (rtx
, rtx
, void *);
618 static void reg_clear_last_set (rtx
, rtx
, void *);
619 static bool store_ops_ok (rtx
, int *);
620 static rtx
extract_mentioned_regs (rtx
);
621 static rtx
extract_mentioned_regs_helper (rtx
, rtx
);
622 static void find_moveable_store (rtx
, int *, int *);
623 static int compute_store_table (void);
624 static bool load_kills_store (rtx
, rtx
, int);
625 static bool find_loads (rtx
, rtx
, int);
626 static bool store_killed_in_insn (rtx
, rtx
, rtx
, int);
627 static bool store_killed_after (rtx
, rtx
, rtx
, basic_block
, int *, rtx
*);
628 static bool store_killed_before (rtx
, rtx
, rtx
, basic_block
, int *);
629 static void build_store_vectors (void);
630 static void insert_insn_start_bb (rtx
, basic_block
);
631 static int insert_store (struct ls_expr
*, edge
);
632 static void remove_reachable_equiv_notes (basic_block
, struct ls_expr
*);
633 static void replace_store_insn (rtx
, rtx
, basic_block
, struct ls_expr
*);
634 static void delete_store (struct ls_expr
*, basic_block
);
635 static void free_store_memory (void);
636 static void store_motion (void);
637 static void free_insn_expr_list_list (rtx
*);
638 static void clear_modify_mem_tables (void);
639 static void free_modify_mem_tables (void);
640 static rtx
gcse_emit_move_after (rtx
, rtx
, rtx
);
641 static void local_cprop_find_used_regs (rtx
*, void *);
642 static bool do_local_cprop (rtx
, rtx
, bool, rtx
*);
643 static bool adjust_libcall_notes (rtx
, rtx
, rtx
, rtx
*);
644 static void local_cprop_pass (bool);
645 static bool is_too_expensive (const char *);
648 /* Entry point for global common subexpression elimination.
649 F is the first instruction in the function. Return nonzero if a
653 gcse_main (rtx f ATTRIBUTE_UNUSED
)
656 /* Bytes used at start of pass. */
657 int initial_bytes_used
;
658 /* Maximum number of bytes used by a pass. */
660 /* Point to release obstack data from for each pass. */
661 char *gcse_obstack_bottom
;
663 /* We do not construct an accurate cfg in functions which call
664 setjmp, so just punt to be safe. */
665 if (current_function_calls_setjmp
)
668 /* Assume that we do not need to run jump optimizations after gcse. */
669 run_jump_opt_after_gcse
= 0;
671 /* Identify the basic block information for this function, including
672 successors and predecessors. */
673 max_gcse_regno
= max_reg_num ();
676 dump_flow_info (dump_file
, dump_flags
);
678 /* Return if there's nothing to do, or it is too expensive. */
679 if (n_basic_blocks
<= NUM_FIXED_BLOCKS
+ 1
680 || is_too_expensive (_("GCSE disabled")))
683 gcc_obstack_init (&gcse_obstack
);
687 init_alias_analysis ();
688 /* Record where pseudo-registers are set. This data is kept accurate
689 during each pass. ??? We could also record hard-reg information here
690 [since it's unchanging], however it is currently done during hash table
693 It may be tempting to compute MEM set information here too, but MEM sets
694 will be subject to code motion one day and thus we need to compute
695 information about memory sets when we build the hash tables. */
697 alloc_reg_set_mem (max_gcse_regno
);
701 initial_bytes_used
= bytes_used
;
703 gcse_obstack_bottom
= gcse_alloc (1);
705 while (changed
&& pass
< MAX_GCSE_PASSES
)
709 fprintf (dump_file
, "GCSE pass %d\n\n", pass
+ 1);
711 /* Initialize bytes_used to the space for the pred/succ lists,
712 and the reg_set_table data. */
713 bytes_used
= initial_bytes_used
;
715 /* Each pass may create new registers, so recalculate each time. */
716 max_gcse_regno
= max_reg_num ();
720 /* Don't allow constant propagation to modify jumps
722 timevar_push (TV_CPROP1
);
723 changed
= one_cprop_pass (pass
+ 1, false, false);
724 timevar_pop (TV_CPROP1
);
730 timevar_push (TV_PRE
);
731 changed
|= one_pre_gcse_pass (pass
+ 1);
732 /* We may have just created new basic blocks. Release and
733 recompute various things which are sized on the number of
737 free_modify_mem_tables ();
738 modify_mem_list
= gcalloc (last_basic_block
, sizeof (rtx
));
739 canon_modify_mem_list
= gcalloc (last_basic_block
, sizeof (rtx
));
742 alloc_reg_set_mem (max_reg_num ());
744 run_jump_opt_after_gcse
= 1;
745 timevar_pop (TV_PRE
);
748 if (max_pass_bytes
< bytes_used
)
749 max_pass_bytes
= bytes_used
;
751 /* Free up memory, then reallocate for code hoisting. We can
752 not re-use the existing allocated memory because the tables
753 will not have info for the insns or registers created by
754 partial redundancy elimination. */
757 /* It does not make sense to run code hoisting unless we are optimizing
758 for code size -- it rarely makes programs faster, and can make
759 them bigger if we did partial redundancy elimination (when optimizing
760 for space, we don't run the partial redundancy algorithms). */
763 timevar_push (TV_HOIST
);
764 max_gcse_regno
= max_reg_num ();
766 changed
|= one_code_hoisting_pass ();
769 if (max_pass_bytes
< bytes_used
)
770 max_pass_bytes
= bytes_used
;
771 timevar_pop (TV_HOIST
);
776 fprintf (dump_file
, "\n");
780 obstack_free (&gcse_obstack
, gcse_obstack_bottom
);
784 /* Do one last pass of copy propagation, including cprop into
785 conditional jumps. */
787 max_gcse_regno
= max_reg_num ();
789 /* This time, go ahead and allow cprop to alter jumps. */
790 timevar_push (TV_CPROP2
);
791 one_cprop_pass (pass
+ 1, true, false);
792 timevar_pop (TV_CPROP2
);
797 fprintf (dump_file
, "GCSE of %s: %d basic blocks, ",
798 current_function_name (), n_basic_blocks
);
799 fprintf (dump_file
, "%d pass%s, %d bytes\n\n",
800 pass
, pass
> 1 ? "es" : "", max_pass_bytes
);
803 obstack_free (&gcse_obstack
, NULL
);
806 /* We are finished with alias. */
807 end_alias_analysis ();
808 allocate_reg_info (max_reg_num (), FALSE
, FALSE
);
810 if (!optimize_size
&& flag_gcse_sm
)
812 timevar_push (TV_LSM
);
814 timevar_pop (TV_LSM
);
817 /* Record where pseudo-registers are set. */
818 return run_jump_opt_after_gcse
;
821 /* Misc. utilities. */
823 /* Nonzero for each mode that supports (set (reg) (reg)).
824 This is trivially true for integer and floating point values.
825 It may or may not be true for condition codes. */
826 static char can_copy
[(int) NUM_MACHINE_MODES
];
828 /* Compute which modes support reg/reg copy operations. */
831 compute_can_copy (void)
834 #ifndef AVOID_CCMODE_COPIES
837 memset (can_copy
, 0, NUM_MACHINE_MODES
);
840 for (i
= 0; i
< NUM_MACHINE_MODES
; i
++)
841 if (GET_MODE_CLASS (i
) == MODE_CC
)
843 #ifdef AVOID_CCMODE_COPIES
846 reg
= gen_rtx_REG ((enum machine_mode
) i
, LAST_VIRTUAL_REGISTER
+ 1);
847 insn
= emit_insn (gen_rtx_SET (VOIDmode
, reg
, reg
));
848 if (recog (PATTERN (insn
), insn
, NULL
) >= 0)
858 /* Returns whether the mode supports reg/reg copy operations. */
861 can_copy_p (enum machine_mode mode
)
863 static bool can_copy_init_p
= false;
865 if (! can_copy_init_p
)
868 can_copy_init_p
= true;
871 return can_copy
[mode
] != 0;
874 /* Cover function to xmalloc to record bytes allocated. */
877 gmalloc (size_t size
)
880 return xmalloc (size
);
883 /* Cover function to xcalloc to record bytes allocated. */
886 gcalloc (size_t nelem
, size_t elsize
)
888 bytes_used
+= nelem
* elsize
;
889 return xcalloc (nelem
, elsize
);
892 /* Cover function to xrealloc.
893 We don't record the additional size since we don't know it.
894 It won't affect memory usage stats much anyway. */
897 grealloc (void *ptr
, size_t size
)
899 return xrealloc (ptr
, size
);
902 /* Cover function to obstack_alloc. */
905 gcse_alloc (unsigned long size
)
908 return obstack_alloc (&gcse_obstack
, size
);
911 /* Allocate memory for the cuid mapping array,
912 and reg/memory set tracking tables.
914 This is called at the start of each pass. */
917 alloc_gcse_mem (void)
923 /* Find the largest UID and create a mapping from UIDs to CUIDs.
924 CUIDs are like UIDs except they increase monotonically, have no gaps,
925 and only apply to real insns.
926 (Actually, there are gaps, for insn that are not inside a basic block.
927 but we should never see those anyway, so this is OK.) */
929 max_uid
= get_max_uid ();
930 uid_cuid
= gcalloc (max_uid
+ 1, sizeof (int));
933 FOR_BB_INSNS (bb
, insn
)
936 uid_cuid
[INSN_UID (insn
)] = i
++;
938 uid_cuid
[INSN_UID (insn
)] = i
;
941 /* Create a table mapping cuids to insns. */
944 cuid_insn
= gcalloc (max_cuid
+ 1, sizeof (rtx
));
947 FOR_BB_INSNS (bb
, insn
)
949 CUID_INSN (i
++) = insn
;
951 /* Allocate vars to track sets of regs. */
952 reg_set_bitmap
= BITMAP_ALLOC (NULL
);
954 /* Allocate vars to track sets of regs, memory per block. */
955 reg_set_in_block
= sbitmap_vector_alloc (last_basic_block
, max_gcse_regno
);
956 /* Allocate array to keep a list of insns which modify memory in each
958 modify_mem_list
= gcalloc (last_basic_block
, sizeof (rtx
));
959 canon_modify_mem_list
= gcalloc (last_basic_block
, sizeof (rtx
));
960 modify_mem_list_set
= BITMAP_ALLOC (NULL
);
961 blocks_with_calls
= BITMAP_ALLOC (NULL
);
964 /* Free memory allocated by alloc_gcse_mem. */
972 BITMAP_FREE (reg_set_bitmap
);
974 sbitmap_vector_free (reg_set_in_block
);
975 free_modify_mem_tables ();
976 BITMAP_FREE (modify_mem_list_set
);
977 BITMAP_FREE (blocks_with_calls
);
980 /* Compute the local properties of each recorded expression.
982 Local properties are those that are defined by the block, irrespective of
985 An expression is transparent in a block if its operands are not modified
988 An expression is computed (locally available) in a block if it is computed
989 at least once and expression would contain the same value if the
990 computation was moved to the end of the block.
992 An expression is locally anticipatable in a block if it is computed at
993 least once and expression would contain the same value if the computation
994 was moved to the beginning of the block.
996 We call this routine for cprop, pre and code hoisting. They all compute
997 basically the same information and thus can easily share this code.
999 TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
1000 properties. If NULL, then it is not necessary to compute or record that
1001 particular property.
1003 TABLE controls which hash table to look at. If it is set hash table,
1004 additionally, TRANSP is computed as ~TRANSP, since this is really cprop's
1008 compute_local_properties (sbitmap
*transp
, sbitmap
*comp
, sbitmap
*antloc
,
1009 struct hash_table
*table
)
1013 /* Initialize any bitmaps that were passed in. */
1017 sbitmap_vector_zero (transp
, last_basic_block
);
1019 sbitmap_vector_ones (transp
, last_basic_block
);
1023 sbitmap_vector_zero (comp
, last_basic_block
);
1025 sbitmap_vector_zero (antloc
, last_basic_block
);
1027 for (i
= 0; i
< table
->size
; i
++)
1031 for (expr
= table
->table
[i
]; expr
!= NULL
; expr
= expr
->next_same_hash
)
1033 int indx
= expr
->bitmap_index
;
1036 /* The expression is transparent in this block if it is not killed.
1037 We start by assuming all are transparent [none are killed], and
1038 then reset the bits for those that are. */
1040 compute_transp (expr
->expr
, indx
, transp
, table
->set_p
);
1042 /* The occurrences recorded in antic_occr are exactly those that
1043 we want to set to nonzero in ANTLOC. */
1045 for (occr
= expr
->antic_occr
; occr
!= NULL
; occr
= occr
->next
)
1047 SET_BIT (antloc
[BLOCK_NUM (occr
->insn
)], indx
);
1049 /* While we're scanning the table, this is a good place to
1051 occr
->deleted_p
= 0;
1054 /* The occurrences recorded in avail_occr are exactly those that
1055 we want to set to nonzero in COMP. */
1057 for (occr
= expr
->avail_occr
; occr
!= NULL
; occr
= occr
->next
)
1059 SET_BIT (comp
[BLOCK_NUM (occr
->insn
)], indx
);
1061 /* While we're scanning the table, this is a good place to
1066 /* While we're scanning the table, this is a good place to
1068 expr
->reaching_reg
= 0;
1073 /* Register set information.
1075 `reg_set_table' records where each register is set or otherwise
1078 static struct obstack reg_set_obstack
;
1081 alloc_reg_set_mem (int n_regs
)
1083 reg_set_table_size
= n_regs
+ REG_SET_TABLE_SLOP
;
1084 reg_set_table
= gcalloc (reg_set_table_size
, sizeof (struct reg_set
*));
1086 gcc_obstack_init (®_set_obstack
);
1090 free_reg_set_mem (void)
1092 free (reg_set_table
);
1093 obstack_free (®_set_obstack
, NULL
);
1096 /* Record REGNO in the reg_set table. */
1099 record_one_set (int regno
, rtx insn
)
1101 /* Allocate a new reg_set element and link it onto the list. */
1102 struct reg_set
*new_reg_info
;
1104 /* If the table isn't big enough, enlarge it. */
1105 if (regno
>= reg_set_table_size
)
1107 int new_size
= regno
+ REG_SET_TABLE_SLOP
;
1109 reg_set_table
= grealloc (reg_set_table
,
1110 new_size
* sizeof (struct reg_set
*));
1111 memset (reg_set_table
+ reg_set_table_size
, 0,
1112 (new_size
- reg_set_table_size
) * sizeof (struct reg_set
*));
1113 reg_set_table_size
= new_size
;
1116 new_reg_info
= obstack_alloc (®_set_obstack
, sizeof (struct reg_set
));
1117 bytes_used
+= sizeof (struct reg_set
);
1118 new_reg_info
->bb_index
= BLOCK_NUM (insn
);
1119 new_reg_info
->next
= reg_set_table
[regno
];
1120 reg_set_table
[regno
] = new_reg_info
;
1123 /* Called from compute_sets via note_stores to handle one SET or CLOBBER in
1124 an insn. The DATA is really the instruction in which the SET is
1128 record_set_info (rtx dest
, rtx setter ATTRIBUTE_UNUSED
, void *data
)
1130 rtx record_set_insn
= (rtx
) data
;
1132 if (REG_P (dest
) && REGNO (dest
) >= FIRST_PSEUDO_REGISTER
)
1133 record_one_set (REGNO (dest
), record_set_insn
);
1136 /* Scan the function and record each set of each pseudo-register.
1138 This is called once, at the start of the gcse pass. See the comments for
1139 `reg_set_table' for further documentation. */
1148 FOR_BB_INSNS (bb
, insn
)
1150 note_stores (PATTERN (insn
), record_set_info
, insn
);
1153 /* Hash table support. */
1155 struct reg_avail_info
1157 basic_block last_bb
;
1162 static struct reg_avail_info
*reg_avail_info
;
1163 static basic_block current_bb
;
1166 /* See whether X, the source of a set, is something we want to consider for
1170 want_to_gcse_p (rtx x
)
1173 /* On register stack architectures, don't GCSE constants from the
1174 constant pool, as the benefits are often swamped by the overhead
1175 of shuffling the register stack between basic blocks. */
1176 if (IS_STACK_MODE (GET_MODE (x
)))
1177 x
= avoid_constant_pool_reference (x
);
1180 switch (GET_CODE (x
))
1191 return can_assign_to_reg_p (x
);
1195 /* Used internally by can_assign_to_reg_p. */
1197 static GTY(()) rtx test_insn
;
1199 /* Return true if we can assign X to a pseudo register. */
1202 can_assign_to_reg_p (rtx x
)
1204 int num_clobbers
= 0;
1207 /* If this is a valid operand, we are OK. If it's VOIDmode, we aren't. */
1208 if (general_operand (x
, GET_MODE (x
)))
1210 else if (GET_MODE (x
) == VOIDmode
)
1213 /* Otherwise, check if we can make a valid insn from it. First initialize
1214 our test insn if we haven't already. */
1218 = make_insn_raw (gen_rtx_SET (VOIDmode
,
1219 gen_rtx_REG (word_mode
,
1220 FIRST_PSEUDO_REGISTER
* 2),
1222 NEXT_INSN (test_insn
) = PREV_INSN (test_insn
) = 0;
1225 /* Now make an insn like the one we would make when GCSE'ing and see if
1227 PUT_MODE (SET_DEST (PATTERN (test_insn
)), GET_MODE (x
));
1228 SET_SRC (PATTERN (test_insn
)) = x
;
1229 return ((icode
= recog (PATTERN (test_insn
), test_insn
, &num_clobbers
)) >= 0
1230 && (num_clobbers
== 0 || ! added_clobbers_hard_reg_p (icode
)));
1233 /* Return nonzero if the operands of expression X are unchanged from the
1234 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1235 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */
1238 oprs_unchanged_p (rtx x
, rtx insn
, int avail_p
)
1247 code
= GET_CODE (x
);
1252 struct reg_avail_info
*info
= ®_avail_info
[REGNO (x
)];
1254 if (info
->last_bb
!= current_bb
)
1257 return info
->last_set
< INSN_CUID (insn
);
1259 return info
->first_set
>= INSN_CUID (insn
);
1263 if (load_killed_in_block_p (current_bb
, INSN_CUID (insn
),
1267 return oprs_unchanged_p (XEXP (x
, 0), insn
, avail_p
);
1293 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
1297 /* If we are about to do the last recursive call needed at this
1298 level, change it into iteration. This function is called enough
1301 return oprs_unchanged_p (XEXP (x
, i
), insn
, avail_p
);
1303 else if (! oprs_unchanged_p (XEXP (x
, i
), insn
, avail_p
))
1306 else if (fmt
[i
] == 'E')
1307 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
1308 if (! oprs_unchanged_p (XVECEXP (x
, i
, j
), insn
, avail_p
))
1315 /* Used for communication between mems_conflict_for_gcse_p and
1316 load_killed_in_block_p. Nonzero if mems_conflict_for_gcse_p finds a
1317 conflict between two memory references. */
1318 static int gcse_mems_conflict_p
;
1320 /* Used for communication between mems_conflict_for_gcse_p and
1321 load_killed_in_block_p. A memory reference for a load instruction,
1322 mems_conflict_for_gcse_p will see if a memory store conflicts with
1323 this memory load. */
1324 static rtx gcse_mem_operand
;
1326 /* DEST is the output of an instruction. If it is a memory reference, and
1327 possibly conflicts with the load found in gcse_mem_operand, then set
1328 gcse_mems_conflict_p to a nonzero value. */
1331 mems_conflict_for_gcse_p (rtx dest
, rtx setter ATTRIBUTE_UNUSED
,
1332 void *data ATTRIBUTE_UNUSED
)
1334 while (GET_CODE (dest
) == SUBREG
1335 || GET_CODE (dest
) == ZERO_EXTRACT
1336 || GET_CODE (dest
) == STRICT_LOW_PART
)
1337 dest
= XEXP (dest
, 0);
1339 /* If DEST is not a MEM, then it will not conflict with the load. Note
1340 that function calls are assumed to clobber memory, but are handled
1345 /* If we are setting a MEM in our list of specially recognized MEMs,
1346 don't mark as killed this time. */
1348 if (expr_equiv_p (dest
, gcse_mem_operand
) && pre_ldst_mems
!= NULL
)
1350 if (!find_rtx_in_ldst (dest
))
1351 gcse_mems_conflict_p
= 1;
1355 if (true_dependence (dest
, GET_MODE (dest
), gcse_mem_operand
,
1357 gcse_mems_conflict_p
= 1;
1360 /* Return nonzero if the expression in X (a memory reference) is killed
1361 in block BB before or after the insn with the CUID in UID_LIMIT.
1362 AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
1365 To check the entire block, set UID_LIMIT to max_uid + 1 and
1369 load_killed_in_block_p (basic_block bb
, int uid_limit
, rtx x
, int avail_p
)
1371 rtx list_entry
= modify_mem_list
[bb
->index
];
1373 /* If this is a readonly then we aren't going to be changing it. */
1374 if (MEM_READONLY_P (x
))
1380 /* Ignore entries in the list that do not apply. */
1382 && INSN_CUID (XEXP (list_entry
, 0)) < uid_limit
)
1384 && INSN_CUID (XEXP (list_entry
, 0)) > uid_limit
))
1386 list_entry
= XEXP (list_entry
, 1);
1390 setter
= XEXP (list_entry
, 0);
1392 /* If SETTER is a call everything is clobbered. Note that calls
1393 to pure functions are never put on the list, so we need not
1394 worry about them. */
1395 if (CALL_P (setter
))
1398 /* SETTER must be an INSN of some kind that sets memory. Call
1399 note_stores to examine each hunk of memory that is modified.
1401 The note_stores interface is pretty limited, so we have to
1402 communicate via global variables. Yuk. */
1403 gcse_mem_operand
= x
;
1404 gcse_mems_conflict_p
= 0;
1405 note_stores (PATTERN (setter
), mems_conflict_for_gcse_p
, NULL
);
1406 if (gcse_mems_conflict_p
)
1408 list_entry
= XEXP (list_entry
, 1);
1413 /* Return nonzero if the operands of expression X are unchanged from
1414 the start of INSN's basic block up to but not including INSN. */
1417 oprs_anticipatable_p (rtx x
, rtx insn
)
1419 return oprs_unchanged_p (x
, insn
, 0);
1422 /* Return nonzero if the operands of expression X are unchanged from
1423 INSN to the end of INSN's basic block. */
1426 oprs_available_p (rtx x
, rtx insn
)
1428 return oprs_unchanged_p (x
, insn
, 1);
1431 /* Hash expression X.
1433 MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean
1434 indicating if a volatile operand is found or if the expression contains
1435 something we don't want to insert in the table. HASH_TABLE_SIZE is
1436 the current size of the hash table to be probed. */
1439 hash_expr (rtx x
, enum machine_mode mode
, int *do_not_record_p
,
1440 int hash_table_size
)
1444 *do_not_record_p
= 0;
1446 hash
= hash_rtx (x
, mode
, do_not_record_p
,
1447 NULL
, /*have_reg_qty=*/false);
1448 return hash
% hash_table_size
;
1451 /* Hash a set of register REGNO.
1453 Sets are hashed on the register that is set. This simplifies the PRE copy
1456 ??? May need to make things more elaborate. Later, as necessary. */
1459 hash_set (int regno
, int hash_table_size
)
1464 return hash
% hash_table_size
;
1467 /* Return nonzero if exp1 is equivalent to exp2. */
1470 expr_equiv_p (rtx x
, rtx y
)
1472 return exp_equiv_p (x
, y
, 0, true);
1475 /* Insert expression X in INSN in the hash TABLE.
1476 If it is already present, record it as the last occurrence in INSN's
1479 MODE is the mode of the value X is being stored into.
1480 It is only used if X is a CONST_INT.
1482 ANTIC_P is nonzero if X is an anticipatable expression.
1483 AVAIL_P is nonzero if X is an available expression. */
1486 insert_expr_in_table (rtx x
, enum machine_mode mode
, rtx insn
, int antic_p
,
1487 int avail_p
, struct hash_table
*table
)
1489 int found
, do_not_record_p
;
1491 struct expr
*cur_expr
, *last_expr
= NULL
;
1492 struct occr
*antic_occr
, *avail_occr
;
1494 hash
= hash_expr (x
, mode
, &do_not_record_p
, table
->size
);
1496 /* Do not insert expression in table if it contains volatile operands,
1497 or if hash_expr determines the expression is something we don't want
1498 to or can't handle. */
1499 if (do_not_record_p
)
1502 cur_expr
= table
->table
[hash
];
1505 while (cur_expr
&& 0 == (found
= expr_equiv_p (cur_expr
->expr
, x
)))
1507 /* If the expression isn't found, save a pointer to the end of
1509 last_expr
= cur_expr
;
1510 cur_expr
= cur_expr
->next_same_hash
;
1515 cur_expr
= gcse_alloc (sizeof (struct expr
));
1516 bytes_used
+= sizeof (struct expr
);
1517 if (table
->table
[hash
] == NULL
)
1518 /* This is the first pattern that hashed to this index. */
1519 table
->table
[hash
] = cur_expr
;
1521 /* Add EXPR to end of this hash chain. */
1522 last_expr
->next_same_hash
= cur_expr
;
1524 /* Set the fields of the expr element. */
1526 cur_expr
->bitmap_index
= table
->n_elems
++;
1527 cur_expr
->next_same_hash
= NULL
;
1528 cur_expr
->antic_occr
= NULL
;
1529 cur_expr
->avail_occr
= NULL
;
1532 /* Now record the occurrence(s). */
1535 antic_occr
= cur_expr
->antic_occr
;
1537 if (antic_occr
&& BLOCK_NUM (antic_occr
->insn
) != BLOCK_NUM (insn
))
1541 /* Found another instance of the expression in the same basic block.
1542 Prefer the currently recorded one. We want the first one in the
1543 block and the block is scanned from start to end. */
1544 ; /* nothing to do */
1547 /* First occurrence of this expression in this basic block. */
1548 antic_occr
= gcse_alloc (sizeof (struct occr
));
1549 bytes_used
+= sizeof (struct occr
);
1550 antic_occr
->insn
= insn
;
1551 antic_occr
->next
= cur_expr
->antic_occr
;
1552 antic_occr
->deleted_p
= 0;
1553 cur_expr
->antic_occr
= antic_occr
;
1559 avail_occr
= cur_expr
->avail_occr
;
1561 if (avail_occr
&& BLOCK_NUM (avail_occr
->insn
) == BLOCK_NUM (insn
))
1563 /* Found another instance of the expression in the same basic block.
1564 Prefer this occurrence to the currently recorded one. We want
1565 the last one in the block and the block is scanned from start
1567 avail_occr
->insn
= insn
;
1571 /* First occurrence of this expression in this basic block. */
1572 avail_occr
= gcse_alloc (sizeof (struct occr
));
1573 bytes_used
+= sizeof (struct occr
);
1574 avail_occr
->insn
= insn
;
1575 avail_occr
->next
= cur_expr
->avail_occr
;
1576 avail_occr
->deleted_p
= 0;
1577 cur_expr
->avail_occr
= avail_occr
;
1582 /* Insert pattern X in INSN in the hash table.
1583 X is a SET of a reg to either another reg or a constant.
1584 If it is already present, record it as the last occurrence in INSN's
1588 insert_set_in_table (rtx x
, rtx insn
, struct hash_table
*table
)
1592 struct expr
*cur_expr
, *last_expr
= NULL
;
1593 struct occr
*cur_occr
;
1595 gcc_assert (GET_CODE (x
) == SET
&& REG_P (SET_DEST (x
)));
1597 hash
= hash_set (REGNO (SET_DEST (x
)), table
->size
);
1599 cur_expr
= table
->table
[hash
];
1602 while (cur_expr
&& 0 == (found
= expr_equiv_p (cur_expr
->expr
, x
)))
1604 /* If the expression isn't found, save a pointer to the end of
1606 last_expr
= cur_expr
;
1607 cur_expr
= cur_expr
->next_same_hash
;
1612 cur_expr
= gcse_alloc (sizeof (struct expr
));
1613 bytes_used
+= sizeof (struct expr
);
1614 if (table
->table
[hash
] == NULL
)
1615 /* This is the first pattern that hashed to this index. */
1616 table
->table
[hash
] = cur_expr
;
1618 /* Add EXPR to end of this hash chain. */
1619 last_expr
->next_same_hash
= cur_expr
;
1621 /* Set the fields of the expr element.
1622 We must copy X because it can be modified when copy propagation is
1623 performed on its operands. */
1624 cur_expr
->expr
= copy_rtx (x
);
1625 cur_expr
->bitmap_index
= table
->n_elems
++;
1626 cur_expr
->next_same_hash
= NULL
;
1627 cur_expr
->antic_occr
= NULL
;
1628 cur_expr
->avail_occr
= NULL
;
1631 /* Now record the occurrence. */
1632 cur_occr
= cur_expr
->avail_occr
;
1634 if (cur_occr
&& BLOCK_NUM (cur_occr
->insn
) == BLOCK_NUM (insn
))
1636 /* Found another instance of the expression in the same basic block.
1637 Prefer this occurrence to the currently recorded one. We want
1638 the last one in the block and the block is scanned from start
1640 cur_occr
->insn
= insn
;
1644 /* First occurrence of this expression in this basic block. */
1645 cur_occr
= gcse_alloc (sizeof (struct occr
));
1646 bytes_used
+= sizeof (struct occr
);
1648 cur_occr
->insn
= insn
;
1649 cur_occr
->next
= cur_expr
->avail_occr
;
1650 cur_occr
->deleted_p
= 0;
1651 cur_expr
->avail_occr
= cur_occr
;
1655 /* Determine whether the rtx X should be treated as a constant for
1656 the purposes of GCSE's constant propagation. */
1659 gcse_constant_p (rtx x
)
1661 /* Consider a COMPARE of two integers constant. */
1662 if (GET_CODE (x
) == COMPARE
1663 && GET_CODE (XEXP (x
, 0)) == CONST_INT
1664 && GET_CODE (XEXP (x
, 1)) == CONST_INT
)
1667 /* Consider a COMPARE of the same registers is a constant
1668 if they are not floating point registers. */
1669 if (GET_CODE(x
) == COMPARE
1670 && REG_P (XEXP (x
, 0)) && REG_P (XEXP (x
, 1))
1671 && REGNO (XEXP (x
, 0)) == REGNO (XEXP (x
, 1))
1672 && ! FLOAT_MODE_P (GET_MODE (XEXP (x
, 0)))
1673 && ! FLOAT_MODE_P (GET_MODE (XEXP (x
, 1))))
1676 return CONSTANT_P (x
);
1679 /* Scan pattern PAT of INSN and add an entry to the hash TABLE (set or
1683 hash_scan_set (rtx pat
, rtx insn
, struct hash_table
*table
)
1685 rtx src
= SET_SRC (pat
);
1686 rtx dest
= SET_DEST (pat
);
1689 if (GET_CODE (src
) == CALL
)
1690 hash_scan_call (src
, insn
, table
);
1692 else if (REG_P (dest
))
1694 unsigned int regno
= REGNO (dest
);
1697 /* See if a REG_NOTE shows this equivalent to a simpler expression.
1698 This allows us to do a single GCSE pass and still eliminate
1699 redundant constants, addresses or other expressions that are
1700 constructed with multiple instructions. */
1701 note
= find_reg_equal_equiv_note (insn
);
1704 ? gcse_constant_p (XEXP (note
, 0))
1705 : want_to_gcse_p (XEXP (note
, 0))))
1706 src
= XEXP (note
, 0), pat
= gen_rtx_SET (VOIDmode
, dest
, src
);
1708 /* Only record sets of pseudo-regs in the hash table. */
1710 && regno
>= FIRST_PSEUDO_REGISTER
1711 /* Don't GCSE something if we can't do a reg/reg copy. */
1712 && can_copy_p (GET_MODE (dest
))
1713 /* GCSE commonly inserts instruction after the insn. We can't
1714 do that easily for EH_REGION notes so disable GCSE on these
1716 && !find_reg_note (insn
, REG_EH_REGION
, NULL_RTX
)
1717 /* Is SET_SRC something we want to gcse? */
1718 && want_to_gcse_p (src
)
1719 /* Don't CSE a nop. */
1720 && ! set_noop_p (pat
)
1721 /* Don't GCSE if it has attached REG_EQUIV note.
1722 At this point this only function parameters should have
1723 REG_EQUIV notes and if the argument slot is used somewhere
1724 explicitly, it means address of parameter has been taken,
1725 so we should not extend the lifetime of the pseudo. */
1726 && (note
== NULL_RTX
|| ! MEM_P (XEXP (note
, 0))))
1728 /* An expression is not anticipatable if its operands are
1729 modified before this insn or if this is not the only SET in
1731 int antic_p
= oprs_anticipatable_p (src
, insn
) && single_set (insn
);
1732 /* An expression is not available if its operands are
1733 subsequently modified, including this insn. It's also not
1734 available if this is a branch, because we can't insert
1735 a set after the branch. */
1736 int avail_p
= (oprs_available_p (src
, insn
)
1737 && ! JUMP_P (insn
));
1739 insert_expr_in_table (src
, GET_MODE (dest
), insn
, antic_p
, avail_p
, table
);
1742 /* Record sets for constant/copy propagation. */
1743 else if (table
->set_p
1744 && regno
>= FIRST_PSEUDO_REGISTER
1746 && REGNO (src
) >= FIRST_PSEUDO_REGISTER
1747 && can_copy_p (GET_MODE (dest
))
1748 && REGNO (src
) != regno
)
1749 || gcse_constant_p (src
))
1750 /* A copy is not available if its src or dest is subsequently
1751 modified. Here we want to search from INSN+1 on, but
1752 oprs_available_p searches from INSN on. */
1753 && (insn
== BB_END (BLOCK_FOR_INSN (insn
))
1754 || ((tmp
= next_nonnote_insn (insn
)) != NULL_RTX
1755 && oprs_available_p (pat
, tmp
))))
1756 insert_set_in_table (pat
, insn
, table
);
1758 /* In case of store we want to consider the memory value as available in
1759 the REG stored in that memory. This makes it possible to remove
1760 redundant loads from due to stores to the same location. */
1761 else if (flag_gcse_las
&& REG_P (src
) && MEM_P (dest
))
1763 unsigned int regno
= REGNO (src
);
1765 /* Do not do this for constant/copy propagation. */
1767 /* Only record sets of pseudo-regs in the hash table. */
1768 && regno
>= FIRST_PSEUDO_REGISTER
1769 /* Don't GCSE something if we can't do a reg/reg copy. */
1770 && can_copy_p (GET_MODE (src
))
1771 /* GCSE commonly inserts instruction after the insn. We can't
1772 do that easily for EH_REGION notes so disable GCSE on these
1774 && ! find_reg_note (insn
, REG_EH_REGION
, NULL_RTX
)
1775 /* Is SET_DEST something we want to gcse? */
1776 && want_to_gcse_p (dest
)
1777 /* Don't CSE a nop. */
1778 && ! set_noop_p (pat
)
1779 /* Don't GCSE if it has attached REG_EQUIV note.
1780 At this point this only function parameters should have
1781 REG_EQUIV notes and if the argument slot is used somewhere
1782 explicitly, it means address of parameter has been taken,
1783 so we should not extend the lifetime of the pseudo. */
1784 && ((note
= find_reg_note (insn
, REG_EQUIV
, NULL_RTX
)) == 0
1785 || ! MEM_P (XEXP (note
, 0))))
1787 /* Stores are never anticipatable. */
1789 /* An expression is not available if its operands are
1790 subsequently modified, including this insn. It's also not
1791 available if this is a branch, because we can't insert
1792 a set after the branch. */
1793 int avail_p
= oprs_available_p (dest
, insn
)
1796 /* Record the memory expression (DEST) in the hash table. */
1797 insert_expr_in_table (dest
, GET_MODE (dest
), insn
,
1798 antic_p
, avail_p
, table
);
1804 hash_scan_clobber (rtx x ATTRIBUTE_UNUSED
, rtx insn ATTRIBUTE_UNUSED
,
1805 struct hash_table
*table ATTRIBUTE_UNUSED
)
1807 /* Currently nothing to do. */
1811 hash_scan_call (rtx x ATTRIBUTE_UNUSED
, rtx insn ATTRIBUTE_UNUSED
,
1812 struct hash_table
*table ATTRIBUTE_UNUSED
)
1814 /* Currently nothing to do. */
1817 /* Process INSN and add hash table entries as appropriate.
1819 Only available expressions that set a single pseudo-reg are recorded.
1821 Single sets in a PARALLEL could be handled, but it's an extra complication
1822 that isn't dealt with right now. The trick is handling the CLOBBERs that
1823 are also in the PARALLEL. Later.
1825 If SET_P is nonzero, this is for the assignment hash table,
1826 otherwise it is for the expression hash table.
1827 If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
1828 not record any expressions. */
1831 hash_scan_insn (rtx insn
, struct hash_table
*table
, int in_libcall_block
)
1833 rtx pat
= PATTERN (insn
);
1836 if (in_libcall_block
)
1839 /* Pick out the sets of INSN and for other forms of instructions record
1840 what's been modified. */
1842 if (GET_CODE (pat
) == SET
)
1843 hash_scan_set (pat
, insn
, table
);
1844 else if (GET_CODE (pat
) == PARALLEL
)
1845 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
1847 rtx x
= XVECEXP (pat
, 0, i
);
1849 if (GET_CODE (x
) == SET
)
1850 hash_scan_set (x
, insn
, table
);
1851 else if (GET_CODE (x
) == CLOBBER
)
1852 hash_scan_clobber (x
, insn
, table
);
1853 else if (GET_CODE (x
) == CALL
)
1854 hash_scan_call (x
, insn
, table
);
1857 else if (GET_CODE (pat
) == CLOBBER
)
1858 hash_scan_clobber (pat
, insn
, table
);
1859 else if (GET_CODE (pat
) == CALL
)
1860 hash_scan_call (pat
, insn
, table
);
1864 dump_hash_table (FILE *file
, const char *name
, struct hash_table
*table
)
1867 /* Flattened out table, so it's printed in proper order. */
1868 struct expr
**flat_table
;
1869 unsigned int *hash_val
;
1872 flat_table
= xcalloc (table
->n_elems
, sizeof (struct expr
*));
1873 hash_val
= xmalloc (table
->n_elems
* sizeof (unsigned int));
1875 for (i
= 0; i
< (int) table
->size
; i
++)
1876 for (expr
= table
->table
[i
]; expr
!= NULL
; expr
= expr
->next_same_hash
)
1878 flat_table
[expr
->bitmap_index
] = expr
;
1879 hash_val
[expr
->bitmap_index
] = i
;
1882 fprintf (file
, "%s hash table (%d buckets, %d entries)\n",
1883 name
, table
->size
, table
->n_elems
);
1885 for (i
= 0; i
< (int) table
->n_elems
; i
++)
1886 if (flat_table
[i
] != 0)
1888 expr
= flat_table
[i
];
1889 fprintf (file
, "Index %d (hash value %d)\n ",
1890 expr
->bitmap_index
, hash_val
[i
]);
1891 print_rtl (file
, expr
->expr
);
1892 fprintf (file
, "\n");
1895 fprintf (file
, "\n");
1901 /* Record register first/last/block set information for REGNO in INSN.
1903 first_set records the first place in the block where the register
1904 is set and is used to compute "anticipatability".
1906 last_set records the last place in the block where the register
1907 is set and is used to compute "availability".
1909 last_bb records the block for which first_set and last_set are
1910 valid, as a quick test to invalidate them.
1912 reg_set_in_block records whether the register is set in the block
1913 and is used to compute "transparency". */
1916 record_last_reg_set_info (rtx insn
, int regno
)
1918 struct reg_avail_info
*info
= ®_avail_info
[regno
];
1919 int cuid
= INSN_CUID (insn
);
1921 info
->last_set
= cuid
;
1922 if (info
->last_bb
!= current_bb
)
1924 info
->last_bb
= current_bb
;
1925 info
->first_set
= cuid
;
1926 SET_BIT (reg_set_in_block
[current_bb
->index
], regno
);
1931 /* Record all of the canonicalized MEMs of record_last_mem_set_info's insn.
1932 Note we store a pair of elements in the list, so they have to be
1933 taken off pairwise. */
1936 canon_list_insert (rtx dest ATTRIBUTE_UNUSED
, rtx unused1 ATTRIBUTE_UNUSED
,
1939 rtx dest_addr
, insn
;
1942 while (GET_CODE (dest
) == SUBREG
1943 || GET_CODE (dest
) == ZERO_EXTRACT
1944 || GET_CODE (dest
) == STRICT_LOW_PART
)
1945 dest
= XEXP (dest
, 0);
1947 /* If DEST is not a MEM, then it will not conflict with a load. Note
1948 that function calls are assumed to clobber memory, but are handled
1954 dest_addr
= get_addr (XEXP (dest
, 0));
1955 dest_addr
= canon_rtx (dest_addr
);
1956 insn
= (rtx
) v_insn
;
1957 bb
= BLOCK_NUM (insn
);
1959 canon_modify_mem_list
[bb
] =
1960 alloc_EXPR_LIST (VOIDmode
, dest_addr
, canon_modify_mem_list
[bb
]);
1961 canon_modify_mem_list
[bb
] =
1962 alloc_EXPR_LIST (VOIDmode
, dest
, canon_modify_mem_list
[bb
]);
1965 /* Record memory modification information for INSN. We do not actually care
1966 about the memory location(s) that are set, or even how they are set (consider
1967 a CALL_INSN). We merely need to record which insns modify memory. */
1970 record_last_mem_set_info (rtx insn
)
1972 int bb
= BLOCK_NUM (insn
);
1974 /* load_killed_in_block_p will handle the case of calls clobbering
1976 modify_mem_list
[bb
] = alloc_INSN_LIST (insn
, modify_mem_list
[bb
]);
1977 bitmap_set_bit (modify_mem_list_set
, bb
);
1981 /* Note that traversals of this loop (other than for free-ing)
1982 will break after encountering a CALL_INSN. So, there's no
1983 need to insert a pair of items, as canon_list_insert does. */
1984 canon_modify_mem_list
[bb
] =
1985 alloc_INSN_LIST (insn
, canon_modify_mem_list
[bb
]);
1986 bitmap_set_bit (blocks_with_calls
, bb
);
1989 note_stores (PATTERN (insn
), canon_list_insert
, (void*) insn
);
1992 /* Called from compute_hash_table via note_stores to handle one
1993 SET or CLOBBER in an insn. DATA is really the instruction in which
1994 the SET is taking place. */
1997 record_last_set_info (rtx dest
, rtx setter ATTRIBUTE_UNUSED
, void *data
)
1999 rtx last_set_insn
= (rtx
) data
;
2001 if (GET_CODE (dest
) == SUBREG
)
2002 dest
= SUBREG_REG (dest
);
2005 record_last_reg_set_info (last_set_insn
, REGNO (dest
));
2006 else if (MEM_P (dest
)
2007 /* Ignore pushes, they clobber nothing. */
2008 && ! push_operand (dest
, GET_MODE (dest
)))
2009 record_last_mem_set_info (last_set_insn
);
2012 /* Top level function to create an expression or assignment hash table.
2014 Expression entries are placed in the hash table if
2015 - they are of the form (set (pseudo-reg) src),
2016 - src is something we want to perform GCSE on,
2017 - none of the operands are subsequently modified in the block
2019 Assignment entries are placed in the hash table if
2020 - they are of the form (set (pseudo-reg) src),
2021 - src is something we want to perform const/copy propagation on,
2022 - none of the operands or target are subsequently modified in the block
2024 Currently src must be a pseudo-reg or a const_int.
2026 TABLE is the table computed. */
2029 compute_hash_table_work (struct hash_table
*table
)
2033 /* While we compute the hash table we also compute a bit array of which
2034 registers are set in which blocks.
2035 ??? This isn't needed during const/copy propagation, but it's cheap to
2037 sbitmap_vector_zero (reg_set_in_block
, last_basic_block
);
2039 /* re-Cache any INSN_LIST nodes we have allocated. */
2040 clear_modify_mem_tables ();
2041 /* Some working arrays used to track first and last set in each block. */
2042 reg_avail_info
= gmalloc (max_gcse_regno
* sizeof (struct reg_avail_info
));
2044 for (i
= 0; i
< max_gcse_regno
; ++i
)
2045 reg_avail_info
[i
].last_bb
= NULL
;
2047 FOR_EACH_BB (current_bb
)
2051 int in_libcall_block
;
2053 /* First pass over the instructions records information used to
2054 determine when registers and memory are first and last set.
2055 ??? hard-reg reg_set_in_block computation
2056 could be moved to compute_sets since they currently don't change. */
2058 FOR_BB_INSNS (current_bb
, insn
)
2060 if (! INSN_P (insn
))
2065 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
2066 if (TEST_HARD_REG_BIT (regs_invalidated_by_call
, regno
))
2067 record_last_reg_set_info (insn
, regno
);
2072 note_stores (PATTERN (insn
), record_last_set_info
, insn
);
2075 /* Insert implicit sets in the hash table. */
2077 && implicit_sets
[current_bb
->index
] != NULL_RTX
)
2078 hash_scan_set (implicit_sets
[current_bb
->index
],
2079 BB_HEAD (current_bb
), table
);
2081 /* The next pass builds the hash table. */
2082 in_libcall_block
= 0;
2083 FOR_BB_INSNS (current_bb
, insn
)
2086 if (find_reg_note (insn
, REG_LIBCALL
, NULL_RTX
))
2087 in_libcall_block
= 1;
2088 else if (table
->set_p
&& find_reg_note (insn
, REG_RETVAL
, NULL_RTX
))
2089 in_libcall_block
= 0;
2090 hash_scan_insn (insn
, table
, in_libcall_block
);
2091 if (!table
->set_p
&& find_reg_note (insn
, REG_RETVAL
, NULL_RTX
))
2092 in_libcall_block
= 0;
2096 free (reg_avail_info
);
2097 reg_avail_info
= NULL
;
2100 /* Allocate space for the set/expr hash TABLE.
2101 N_INSNS is the number of instructions in the function.
2102 It is used to determine the number of buckets to use.
2103 SET_P determines whether set or expression table will
2107 alloc_hash_table (int n_insns
, struct hash_table
*table
, int set_p
)
2111 table
->size
= n_insns
/ 4;
2112 if (table
->size
< 11)
2115 /* Attempt to maintain efficient use of hash table.
2116 Making it an odd number is simplest for now.
2117 ??? Later take some measurements. */
2119 n
= table
->size
* sizeof (struct expr
*);
2120 table
->table
= gmalloc (n
);
2121 table
->set_p
= set_p
;
2124 /* Free things allocated by alloc_hash_table. */
2127 free_hash_table (struct hash_table
*table
)
2129 free (table
->table
);
2132 /* Compute the hash TABLE for doing copy/const propagation or
2133 expression hash table. */
2136 compute_hash_table (struct hash_table
*table
)
2138 /* Initialize count of number of entries in hash table. */
2140 memset (table
->table
, 0, table
->size
* sizeof (struct expr
*));
2142 compute_hash_table_work (table
);
2145 /* Expression tracking support. */
2147 /* Lookup REGNO in the set TABLE. The result is a pointer to the
2148 table entry, or NULL if not found. */
2150 static struct expr
*
2151 lookup_set (unsigned int regno
, struct hash_table
*table
)
2153 unsigned int hash
= hash_set (regno
, table
->size
);
2156 expr
= table
->table
[hash
];
2158 while (expr
&& REGNO (SET_DEST (expr
->expr
)) != regno
)
2159 expr
= expr
->next_same_hash
;
2164 /* Return the next entry for REGNO in list EXPR. */
2166 static struct expr
*
2167 next_set (unsigned int regno
, struct expr
*expr
)
2170 expr
= expr
->next_same_hash
;
2171 while (expr
&& REGNO (SET_DEST (expr
->expr
)) != regno
);
2176 /* Like free_INSN_LIST_list or free_EXPR_LIST_list, except that the node
2177 types may be mixed. */
2180 free_insn_expr_list_list (rtx
*listp
)
2184 for (list
= *listp
; list
; list
= next
)
2186 next
= XEXP (list
, 1);
2187 if (GET_CODE (list
) == EXPR_LIST
)
2188 free_EXPR_LIST_node (list
);
2190 free_INSN_LIST_node (list
);
2196 /* Clear canon_modify_mem_list and modify_mem_list tables. */
2198 clear_modify_mem_tables (void)
2203 EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set
, 0, i
, bi
)
2205 free_INSN_LIST_list (modify_mem_list
+ i
);
2206 free_insn_expr_list_list (canon_modify_mem_list
+ i
);
2208 bitmap_clear (modify_mem_list_set
);
2209 bitmap_clear (blocks_with_calls
);
2212 /* Release memory used by modify_mem_list_set. */
2215 free_modify_mem_tables (void)
2217 clear_modify_mem_tables ();
2218 free (modify_mem_list
);
2219 free (canon_modify_mem_list
);
2220 modify_mem_list
= 0;
2221 canon_modify_mem_list
= 0;
2224 /* Reset tables used to keep track of what's still available [since the
2225 start of the block]. */
2228 reset_opr_set_tables (void)
2230 /* Maintain a bitmap of which regs have been set since beginning of
2232 CLEAR_REG_SET (reg_set_bitmap
);
2234 /* Also keep a record of the last instruction to modify memory.
2235 For now this is very trivial, we only record whether any memory
2236 location has been modified. */
2237 clear_modify_mem_tables ();
2240 /* Return nonzero if the operands of X are not set before INSN in
2241 INSN's basic block. */
2244 oprs_not_set_p (rtx x
, rtx insn
)
2253 code
= GET_CODE (x
);
2269 if (load_killed_in_block_p (BLOCK_FOR_INSN (insn
),
2270 INSN_CUID (insn
), x
, 0))
2273 return oprs_not_set_p (XEXP (x
, 0), insn
);
2276 return ! REGNO_REG_SET_P (reg_set_bitmap
, REGNO (x
));
2282 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
2286 /* If we are about to do the last recursive call
2287 needed at this level, change it into iteration.
2288 This function is called enough to be worth it. */
2290 return oprs_not_set_p (XEXP (x
, i
), insn
);
2292 if (! oprs_not_set_p (XEXP (x
, i
), insn
))
2295 else if (fmt
[i
] == 'E')
2296 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
2297 if (! oprs_not_set_p (XVECEXP (x
, i
, j
), insn
))
2304 /* Mark things set by a CALL. */
2307 mark_call (rtx insn
)
2309 if (! CONST_OR_PURE_CALL_P (insn
))
2310 record_last_mem_set_info (insn
);
2313 /* Mark things set by a SET. */
2316 mark_set (rtx pat
, rtx insn
)
2318 rtx dest
= SET_DEST (pat
);
2320 while (GET_CODE (dest
) == SUBREG
2321 || GET_CODE (dest
) == ZERO_EXTRACT
2322 || GET_CODE (dest
) == STRICT_LOW_PART
)
2323 dest
= XEXP (dest
, 0);
2326 SET_REGNO_REG_SET (reg_set_bitmap
, REGNO (dest
));
2327 else if (MEM_P (dest
))
2328 record_last_mem_set_info (insn
);
2330 if (GET_CODE (SET_SRC (pat
)) == CALL
)
2334 /* Record things set by a CLOBBER. */
2337 mark_clobber (rtx pat
, rtx insn
)
2339 rtx clob
= XEXP (pat
, 0);
2341 while (GET_CODE (clob
) == SUBREG
|| GET_CODE (clob
) == STRICT_LOW_PART
)
2342 clob
= XEXP (clob
, 0);
2345 SET_REGNO_REG_SET (reg_set_bitmap
, REGNO (clob
));
2347 record_last_mem_set_info (insn
);
2350 /* Record things set by INSN.
2351 This data is used by oprs_not_set_p. */
2354 mark_oprs_set (rtx insn
)
2356 rtx pat
= PATTERN (insn
);
2359 if (GET_CODE (pat
) == SET
)
2360 mark_set (pat
, insn
);
2361 else if (GET_CODE (pat
) == PARALLEL
)
2362 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
2364 rtx x
= XVECEXP (pat
, 0, i
);
2366 if (GET_CODE (x
) == SET
)
2368 else if (GET_CODE (x
) == CLOBBER
)
2369 mark_clobber (x
, insn
);
2370 else if (GET_CODE (x
) == CALL
)
2374 else if (GET_CODE (pat
) == CLOBBER
)
2375 mark_clobber (pat
, insn
);
2376 else if (GET_CODE (pat
) == CALL
)
2381 /* Compute copy/constant propagation working variables. */
2383 /* Local properties of assignments. */
2384 static sbitmap
*cprop_pavloc
;
2385 static sbitmap
*cprop_absaltered
;
2387 /* Global properties of assignments (computed from the local properties). */
2388 static sbitmap
*cprop_avin
;
2389 static sbitmap
*cprop_avout
;
2391 /* Allocate vars used for copy/const propagation. N_BLOCKS is the number of
2392 basic blocks. N_SETS is the number of sets. */
2395 alloc_cprop_mem (int n_blocks
, int n_sets
)
2397 cprop_pavloc
= sbitmap_vector_alloc (n_blocks
, n_sets
);
2398 cprop_absaltered
= sbitmap_vector_alloc (n_blocks
, n_sets
);
2400 cprop_avin
= sbitmap_vector_alloc (n_blocks
, n_sets
);
2401 cprop_avout
= sbitmap_vector_alloc (n_blocks
, n_sets
);
2404 /* Free vars used by copy/const propagation. */
2407 free_cprop_mem (void)
2409 sbitmap_vector_free (cprop_pavloc
);
2410 sbitmap_vector_free (cprop_absaltered
);
2411 sbitmap_vector_free (cprop_avin
);
2412 sbitmap_vector_free (cprop_avout
);
2415 /* For each block, compute whether X is transparent. X is either an
2416 expression or an assignment [though we don't care which, for this context
2417 an assignment is treated as an expression]. For each block where an
2418 element of X is modified, set (SET_P == 1) or reset (SET_P == 0) the INDX
2422 compute_transp (rtx x
, int indx
, sbitmap
*bmap
, int set_p
)
2430 /* repeat is used to turn tail-recursion into iteration since GCC
2431 can't do it when there's no return value. */
2437 code
= GET_CODE (x
);
2443 if (REGNO (x
) < FIRST_PSEUDO_REGISTER
)
2446 if (TEST_BIT (reg_set_in_block
[bb
->index
], REGNO (x
)))
2447 SET_BIT (bmap
[bb
->index
], indx
);
2451 for (r
= reg_set_table
[REGNO (x
)]; r
!= NULL
; r
= r
->next
)
2452 SET_BIT (bmap
[r
->bb_index
], indx
);
2457 if (REGNO (x
) < FIRST_PSEUDO_REGISTER
)
2460 if (TEST_BIT (reg_set_in_block
[bb
->index
], REGNO (x
)))
2461 RESET_BIT (bmap
[bb
->index
], indx
);
2465 for (r
= reg_set_table
[REGNO (x
)]; r
!= NULL
; r
= r
->next
)
2466 RESET_BIT (bmap
[r
->bb_index
], indx
);
2473 if (! MEM_READONLY_P (x
))
2478 /* First handle all the blocks with calls. We don't need to
2479 do any list walking for them. */
2480 EXECUTE_IF_SET_IN_BITMAP (blocks_with_calls
, 0, bb_index
, bi
)
2483 SET_BIT (bmap
[bb_index
], indx
);
2485 RESET_BIT (bmap
[bb_index
], indx
);
2488 /* Now iterate over the blocks which have memory modifications
2489 but which do not have any calls. */
2490 EXECUTE_IF_AND_COMPL_IN_BITMAP (modify_mem_list_set
,
2494 rtx list_entry
= canon_modify_mem_list
[bb_index
];
2498 rtx dest
, dest_addr
;
2500 /* LIST_ENTRY must be an INSN of some kind that sets memory.
2501 Examine each hunk of memory that is modified. */
2503 dest
= XEXP (list_entry
, 0);
2504 list_entry
= XEXP (list_entry
, 1);
2505 dest_addr
= XEXP (list_entry
, 0);
2507 if (canon_true_dependence (dest
, GET_MODE (dest
), dest_addr
,
2508 x
, rtx_addr_varies_p
))
2511 SET_BIT (bmap
[bb_index
], indx
);
2513 RESET_BIT (bmap
[bb_index
], indx
);
2516 list_entry
= XEXP (list_entry
, 1);
2540 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
2544 /* If we are about to do the last recursive call
2545 needed at this level, change it into iteration.
2546 This function is called enough to be worth it. */
2553 compute_transp (XEXP (x
, i
), indx
, bmap
, set_p
);
2555 else if (fmt
[i
] == 'E')
2556 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
2557 compute_transp (XVECEXP (x
, i
, j
), indx
, bmap
, set_p
);
2561 /* Top level routine to do the dataflow analysis needed by copy/const
2565 compute_cprop_data (void)
2567 compute_local_properties (cprop_absaltered
, cprop_pavloc
, NULL
, &set_hash_table
);
2568 compute_available (cprop_pavloc
, cprop_absaltered
,
2569 cprop_avout
, cprop_avin
);
2572 /* Copy/constant propagation. */
2574 /* Maximum number of register uses in an insn that we handle. */
2577 /* Table of uses found in an insn.
2578 Allocated statically to avoid alloc/free complexity and overhead. */
2579 static struct reg_use reg_use_table
[MAX_USES
];
2581 /* Index into `reg_use_table' while building it. */
2582 static int reg_use_count
;
2584 /* Set up a list of register numbers used in INSN. The found uses are stored
2585 in `reg_use_table'. `reg_use_count' is initialized to zero before entry,
2586 and contains the number of uses in the table upon exit.
2588 ??? If a register appears multiple times we will record it multiple times.
2589 This doesn't hurt anything but it will slow things down. */
2592 find_used_regs (rtx
*xptr
, void *data ATTRIBUTE_UNUSED
)
2599 /* repeat is used to turn tail-recursion into iteration since GCC
2600 can't do it when there's no return value. */
2605 code
= GET_CODE (x
);
2608 if (reg_use_count
== MAX_USES
)
2611 reg_use_table
[reg_use_count
].reg_rtx
= x
;
2615 /* Recursively scan the operands of this expression. */
2617 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
2621 /* If we are about to do the last recursive call
2622 needed at this level, change it into iteration.
2623 This function is called enough to be worth it. */
2630 find_used_regs (&XEXP (x
, i
), data
);
2632 else if (fmt
[i
] == 'E')
2633 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
2634 find_used_regs (&XVECEXP (x
, i
, j
), data
);
2638 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
2639 Returns nonzero is successful. */
2642 try_replace_reg (rtx from
, rtx to
, rtx insn
)
2644 rtx note
= find_reg_equal_equiv_note (insn
);
2647 rtx set
= single_set (insn
);
2649 validate_replace_src_group (from
, to
, insn
);
2650 if (num_changes_pending () && apply_change_group ())
2653 /* Try to simplify SET_SRC if we have substituted a constant. */
2654 if (success
&& set
&& CONSTANT_P (to
))
2656 src
= simplify_rtx (SET_SRC (set
));
2659 validate_change (insn
, &SET_SRC (set
), src
, 0);
2662 /* If there is already a REG_EQUAL note, update the expression in it
2663 with our replacement. */
2664 if (note
!= 0 && REG_NOTE_KIND (note
) == REG_EQUAL
)
2665 XEXP (note
, 0) = simplify_replace_rtx (XEXP (note
, 0), from
, to
);
2667 if (!success
&& set
&& reg_mentioned_p (from
, SET_SRC (set
)))
2669 /* If above failed and this is a single set, try to simplify the source of
2670 the set given our substitution. We could perhaps try this for multiple
2671 SETs, but it probably won't buy us anything. */
2672 src
= simplify_replace_rtx (SET_SRC (set
), from
, to
);
2674 if (!rtx_equal_p (src
, SET_SRC (set
))
2675 && validate_change (insn
, &SET_SRC (set
), src
, 0))
2678 /* If we've failed to do replacement, have a single SET, don't already
2679 have a note, and have no special SET, add a REG_EQUAL note to not
2680 lose information. */
2681 if (!success
&& note
== 0 && set
!= 0
2682 && GET_CODE (SET_DEST (set
)) != ZERO_EXTRACT
2683 && GET_CODE (SET_DEST (set
)) != STRICT_LOW_PART
)
2684 note
= set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
2687 /* REG_EQUAL may get simplified into register.
2688 We don't allow that. Remove that note. This code ought
2689 not to happen, because previous code ought to synthesize
2690 reg-reg move, but be on the safe side. */
2691 if (note
&& REG_NOTE_KIND (note
) == REG_EQUAL
&& REG_P (XEXP (note
, 0)))
2692 remove_note (insn
, note
);
2697 /* Find a set of REGNOs that are available on entry to INSN's block. Returns
2698 NULL no such set is found. */
2700 static struct expr
*
2701 find_avail_set (int regno
, rtx insn
)
2703 /* SET1 contains the last set found that can be returned to the caller for
2704 use in a substitution. */
2705 struct expr
*set1
= 0;
2707 /* Loops are not possible here. To get a loop we would need two sets
2708 available at the start of the block containing INSN. i.e. we would
2709 need two sets like this available at the start of the block:
2711 (set (reg X) (reg Y))
2712 (set (reg Y) (reg X))
2714 This can not happen since the set of (reg Y) would have killed the
2715 set of (reg X) making it unavailable at the start of this block. */
2719 struct expr
*set
= lookup_set (regno
, &set_hash_table
);
2721 /* Find a set that is available at the start of the block
2722 which contains INSN. */
2725 if (TEST_BIT (cprop_avin
[BLOCK_NUM (insn
)], set
->bitmap_index
))
2727 set
= next_set (regno
, set
);
2730 /* If no available set was found we've reached the end of the
2731 (possibly empty) copy chain. */
2735 gcc_assert (GET_CODE (set
->expr
) == SET
);
2737 src
= SET_SRC (set
->expr
);
2739 /* We know the set is available.
2740 Now check that SRC is ANTLOC (i.e. none of the source operands
2741 have changed since the start of the block).
2743 If the source operand changed, we may still use it for the next
2744 iteration of this loop, but we may not use it for substitutions. */
2746 if (gcse_constant_p (src
) || oprs_not_set_p (src
, insn
))
2749 /* If the source of the set is anything except a register, then
2750 we have reached the end of the copy chain. */
2754 /* Follow the copy chain, i.e. start another iteration of the loop
2755 and see if we have an available copy into SRC. */
2756 regno
= REGNO (src
);
2759 /* SET1 holds the last set that was available and anticipatable at
2764 /* Subroutine of cprop_insn that tries to propagate constants into
2765 JUMP_INSNS. JUMP must be a conditional jump. If SETCC is non-NULL
2766 it is the instruction that immediately precedes JUMP, and must be a
2767 single SET of a register. FROM is what we will try to replace,
2768 SRC is the constant we will try to substitute for it. Returns nonzero
2769 if a change was made. */
2772 cprop_jump (basic_block bb
, rtx setcc
, rtx jump
, rtx from
, rtx src
)
2774 rtx
new, set_src
, note_src
;
2775 rtx set
= pc_set (jump
);
2776 rtx note
= find_reg_equal_equiv_note (jump
);
2780 note_src
= XEXP (note
, 0);
2781 if (GET_CODE (note_src
) == EXPR_LIST
)
2782 note_src
= NULL_RTX
;
2784 else note_src
= NULL_RTX
;
2786 /* Prefer REG_EQUAL notes except those containing EXPR_LISTs. */
2787 set_src
= note_src
? note_src
: SET_SRC (set
);
2789 /* First substitute the SETCC condition into the JUMP instruction,
2790 then substitute that given values into this expanded JUMP. */
2791 if (setcc
!= NULL_RTX
2792 && !modified_between_p (from
, setcc
, jump
)
2793 && !modified_between_p (src
, setcc
, jump
))
2796 rtx setcc_set
= single_set (setcc
);
2797 rtx setcc_note
= find_reg_equal_equiv_note (setcc
);
2798 setcc_src
= (setcc_note
&& GET_CODE (XEXP (setcc_note
, 0)) != EXPR_LIST
)
2799 ? XEXP (setcc_note
, 0) : SET_SRC (setcc_set
);
2800 set_src
= simplify_replace_rtx (set_src
, SET_DEST (setcc_set
),
2806 new = simplify_replace_rtx (set_src
, from
, src
);
2808 /* If no simplification can be made, then try the next register. */
2809 if (rtx_equal_p (new, SET_SRC (set
)))
2812 /* If this is now a no-op delete it, otherwise this must be a valid insn. */
2817 /* Ensure the value computed inside the jump insn to be equivalent
2818 to one computed by setcc. */
2819 if (setcc
&& modified_in_p (new, setcc
))
2821 if (! validate_change (jump
, &SET_SRC (set
), new, 0))
2823 /* When (some) constants are not valid in a comparison, and there
2824 are two registers to be replaced by constants before the entire
2825 comparison can be folded into a constant, we need to keep
2826 intermediate information in REG_EQUAL notes. For targets with
2827 separate compare insns, such notes are added by try_replace_reg.
2828 When we have a combined compare-and-branch instruction, however,
2829 we need to attach a note to the branch itself to make this
2830 optimization work. */
2832 if (!rtx_equal_p (new, note_src
))
2833 set_unique_reg_note (jump
, REG_EQUAL
, copy_rtx (new));
2837 /* Remove REG_EQUAL note after simplification. */
2839 remove_note (jump
, note
);
2841 /* If this has turned into an unconditional jump,
2842 then put a barrier after it so that the unreachable
2843 code will be deleted. */
2844 if (GET_CODE (SET_SRC (set
)) == LABEL_REF
)
2845 emit_barrier_after (jump
);
2849 /* Delete the cc0 setter. */
2850 if (setcc
!= NULL
&& CC0_P (SET_DEST (single_set (setcc
))))
2851 delete_insn (setcc
);
2854 run_jump_opt_after_gcse
= 1;
2856 global_const_prop_count
++;
2857 if (dump_file
!= NULL
)
2860 "GLOBAL CONST-PROP: Replacing reg %d in jump_insn %d with constant ",
2861 REGNO (from
), INSN_UID (jump
));
2862 print_rtl (dump_file
, src
);
2863 fprintf (dump_file
, "\n");
2865 purge_dead_edges (bb
);
2871 constprop_register (rtx insn
, rtx from
, rtx to
, bool alter_jumps
)
2875 /* Check for reg or cc0 setting instructions followed by
2876 conditional branch instructions first. */
2878 && (sset
= single_set (insn
)) != NULL
2880 && any_condjump_p (NEXT_INSN (insn
)) && onlyjump_p (NEXT_INSN (insn
)))
2882 rtx dest
= SET_DEST (sset
);
2883 if ((REG_P (dest
) || CC0_P (dest
))
2884 && cprop_jump (BLOCK_FOR_INSN (insn
), insn
, NEXT_INSN (insn
), from
, to
))
2888 /* Handle normal insns next. */
2889 if (NONJUMP_INSN_P (insn
)
2890 && try_replace_reg (from
, to
, insn
))
2893 /* Try to propagate a CONST_INT into a conditional jump.
2894 We're pretty specific about what we will handle in this
2895 code, we can extend this as necessary over time.
2897 Right now the insn in question must look like
2898 (set (pc) (if_then_else ...)) */
2899 else if (alter_jumps
&& any_condjump_p (insn
) && onlyjump_p (insn
))
2900 return cprop_jump (BLOCK_FOR_INSN (insn
), NULL
, insn
, from
, to
);
2904 /* Perform constant and copy propagation on INSN.
2905 The result is nonzero if a change was made. */
2908 cprop_insn (rtx insn
, int alter_jumps
)
2910 struct reg_use
*reg_used
;
2918 note_uses (&PATTERN (insn
), find_used_regs
, NULL
);
2920 note
= find_reg_equal_equiv_note (insn
);
2922 /* We may win even when propagating constants into notes. */
2924 find_used_regs (&XEXP (note
, 0), NULL
);
2926 for (reg_used
= ®_use_table
[0]; reg_use_count
> 0;
2927 reg_used
++, reg_use_count
--)
2929 unsigned int regno
= REGNO (reg_used
->reg_rtx
);
2933 /* Ignore registers created by GCSE.
2934 We do this because ... */
2935 if (regno
>= max_gcse_regno
)
2938 /* If the register has already been set in this block, there's
2939 nothing we can do. */
2940 if (! oprs_not_set_p (reg_used
->reg_rtx
, insn
))
2943 /* Find an assignment that sets reg_used and is available
2944 at the start of the block. */
2945 set
= find_avail_set (regno
, insn
);
2950 /* ??? We might be able to handle PARALLELs. Later. */
2951 gcc_assert (GET_CODE (pat
) == SET
);
2953 src
= SET_SRC (pat
);
2955 /* Constant propagation. */
2956 if (gcse_constant_p (src
))
2958 if (constprop_register (insn
, reg_used
->reg_rtx
, src
, alter_jumps
))
2961 global_const_prop_count
++;
2962 if (dump_file
!= NULL
)
2964 fprintf (dump_file
, "GLOBAL CONST-PROP: Replacing reg %d in ", regno
);
2965 fprintf (dump_file
, "insn %d with constant ", INSN_UID (insn
));
2966 print_rtl (dump_file
, src
);
2967 fprintf (dump_file
, "\n");
2969 if (INSN_DELETED_P (insn
))
2973 else if (REG_P (src
)
2974 && REGNO (src
) >= FIRST_PSEUDO_REGISTER
2975 && REGNO (src
) != regno
)
2977 if (try_replace_reg (reg_used
->reg_rtx
, src
, insn
))
2980 global_copy_prop_count
++;
2981 if (dump_file
!= NULL
)
2983 fprintf (dump_file
, "GLOBAL COPY-PROP: Replacing reg %d in insn %d",
2984 regno
, INSN_UID (insn
));
2985 fprintf (dump_file
, " with reg %d\n", REGNO (src
));
2988 /* The original insn setting reg_used may or may not now be
2989 deletable. We leave the deletion to flow. */
2990 /* FIXME: If it turns out that the insn isn't deletable,
2991 then we may have unnecessarily extended register lifetimes
2992 and made things worse. */
3000 /* Like find_used_regs, but avoid recording uses that appear in
3001 input-output contexts such as zero_extract or pre_dec. This
3002 restricts the cases we consider to those for which local cprop
3003 can legitimately make replacements. */
3006 local_cprop_find_used_regs (rtx
*xptr
, void *data
)
3013 switch (GET_CODE (x
))
3017 case STRICT_LOW_PART
:
3026 /* Can only legitimately appear this early in the context of
3027 stack pushes for function arguments, but handle all of the
3028 codes nonetheless. */
3032 /* Setting a subreg of a register larger than word_mode leaves
3033 the non-written words unchanged. */
3034 if (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (x
))) > BITS_PER_WORD
)
3042 find_used_regs (xptr
, data
);
3045 /* LIBCALL_SP is a zero-terminated array of insns at the end of a libcall;
3046 their REG_EQUAL notes need updating. */
3049 do_local_cprop (rtx x
, rtx insn
, bool alter_jumps
, rtx
*libcall_sp
)
3051 rtx newreg
= NULL
, newcnst
= NULL
;
3053 /* Rule out USE instructions and ASM statements as we don't want to
3054 change the hard registers mentioned. */
3056 && (REGNO (x
) >= FIRST_PSEUDO_REGISTER
3057 || (GET_CODE (PATTERN (insn
)) != USE
3058 && asm_noperands (PATTERN (insn
)) < 0)))
3060 cselib_val
*val
= cselib_lookup (x
, GET_MODE (x
), 0);
3061 struct elt_loc_list
*l
;
3065 for (l
= val
->locs
; l
; l
= l
->next
)
3067 rtx this_rtx
= l
->loc
;
3070 /* Don't CSE non-constant values out of libcall blocks. */
3071 if (l
->in_libcall
&& ! CONSTANT_P (this_rtx
))
3074 if (gcse_constant_p (this_rtx
))
3076 if (REG_P (this_rtx
) && REGNO (this_rtx
) >= FIRST_PSEUDO_REGISTER
3077 /* Don't copy propagate if it has attached REG_EQUIV note.
3078 At this point this only function parameters should have
3079 REG_EQUIV notes and if the argument slot is used somewhere
3080 explicitly, it means address of parameter has been taken,
3081 so we should not extend the lifetime of the pseudo. */
3082 && (!(note
= find_reg_note (l
->setting_insn
, REG_EQUIV
, NULL_RTX
))
3083 || ! MEM_P (XEXP (note
, 0))))
3086 if (newcnst
&& constprop_register (insn
, x
, newcnst
, alter_jumps
))
3088 /* If we find a case where we can't fix the retval REG_EQUAL notes
3089 match the new register, we either have to abandon this replacement
3090 or fix delete_trivially_dead_insns to preserve the setting insn,
3091 or make it delete the REG_EUAQL note, and fix up all passes that
3092 require the REG_EQUAL note there. */
3095 adjusted
= adjust_libcall_notes (x
, newcnst
, insn
, libcall_sp
);
3096 gcc_assert (adjusted
);
3098 if (dump_file
!= NULL
)
3100 fprintf (dump_file
, "LOCAL CONST-PROP: Replacing reg %d in ",
3102 fprintf (dump_file
, "insn %d with constant ",
3104 print_rtl (dump_file
, newcnst
);
3105 fprintf (dump_file
, "\n");
3107 local_const_prop_count
++;
3110 else if (newreg
&& newreg
!= x
&& try_replace_reg (x
, newreg
, insn
))
3112 adjust_libcall_notes (x
, newreg
, insn
, libcall_sp
);
3113 if (dump_file
!= NULL
)
3116 "LOCAL COPY-PROP: Replacing reg %d in insn %d",
3117 REGNO (x
), INSN_UID (insn
));
3118 fprintf (dump_file
, " with reg %d\n", REGNO (newreg
));
3120 local_copy_prop_count
++;
3127 /* LIBCALL_SP is a zero-terminated array of insns at the end of a libcall;
3128 their REG_EQUAL notes need updating to reflect that OLDREG has been
3129 replaced with NEWVAL in INSN. Return true if all substitutions could
3132 adjust_libcall_notes (rtx oldreg
, rtx newval
, rtx insn
, rtx
*libcall_sp
)
3136 while ((end
= *libcall_sp
++))
3138 rtx note
= find_reg_equal_equiv_note (end
);
3145 if (reg_set_between_p (newval
, PREV_INSN (insn
), end
))
3149 note
= find_reg_equal_equiv_note (end
);
3152 if (reg_mentioned_p (newval
, XEXP (note
, 0)))
3155 while ((end
= *libcall_sp
++));
3159 XEXP (note
, 0) = simplify_replace_rtx (XEXP (note
, 0), oldreg
, newval
);
3165 #define MAX_NESTED_LIBCALLS 9
3167 /* Do local const/copy propagation (i.e. within each basic block).
3168 If ALTER_JUMPS is true, allow propagating into jump insns, which
3169 could modify the CFG. */
3172 local_cprop_pass (bool alter_jumps
)
3176 struct reg_use
*reg_used
;
3177 rtx libcall_stack
[MAX_NESTED_LIBCALLS
+ 1], *libcall_sp
;
3178 bool changed
= false;
3180 cselib_init (false);
3181 libcall_sp
= &libcall_stack
[MAX_NESTED_LIBCALLS
];
3185 FOR_BB_INSNS (bb
, insn
)
3189 rtx note
= find_reg_note (insn
, REG_LIBCALL
, NULL_RTX
);
3193 gcc_assert (libcall_sp
!= libcall_stack
);
3194 *--libcall_sp
= XEXP (note
, 0);
3196 note
= find_reg_note (insn
, REG_RETVAL
, NULL_RTX
);
3199 note
= find_reg_equal_equiv_note (insn
);
3203 note_uses (&PATTERN (insn
), local_cprop_find_used_regs
,
3206 local_cprop_find_used_regs (&XEXP (note
, 0), NULL
);
3208 for (reg_used
= ®_use_table
[0]; reg_use_count
> 0;
3209 reg_used
++, reg_use_count
--)
3210 if (do_local_cprop (reg_used
->reg_rtx
, insn
, alter_jumps
,
3216 if (INSN_DELETED_P (insn
))
3219 while (reg_use_count
);
3221 cselib_process_insn (insn
);
3224 /* Forget everything at the end of a basic block. Make sure we are
3225 not inside a libcall, they should never cross basic blocks. */
3226 cselib_clear_table ();
3227 gcc_assert (libcall_sp
== &libcall_stack
[MAX_NESTED_LIBCALLS
]);
3232 /* Global analysis may get into infinite loops for unreachable blocks. */
3233 if (changed
&& alter_jumps
)
3235 delete_unreachable_blocks ();
3236 free_reg_set_mem ();
3237 alloc_reg_set_mem (max_reg_num ());
3242 /* Forward propagate copies. This includes copies and constants. Return
3243 nonzero if a change was made. */
3246 cprop (int alter_jumps
)
3252 /* Note we start at block 1. */
3253 if (ENTRY_BLOCK_PTR
->next_bb
== EXIT_BLOCK_PTR
)
3255 if (dump_file
!= NULL
)
3256 fprintf (dump_file
, "\n");
3261 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
->next_bb
->next_bb
, EXIT_BLOCK_PTR
, next_bb
)
3263 /* Reset tables used to keep track of what's still valid [since the
3264 start of the block]. */
3265 reset_opr_set_tables ();
3267 FOR_BB_INSNS (bb
, insn
)
3270 changed
|= cprop_insn (insn
, alter_jumps
);
3272 /* Keep track of everything modified by this insn. */
3273 /* ??? Need to be careful w.r.t. mods done to INSN. Don't
3274 call mark_oprs_set if we turned the insn into a NOTE. */
3275 if (! NOTE_P (insn
))
3276 mark_oprs_set (insn
);
3280 if (dump_file
!= NULL
)
3281 fprintf (dump_file
, "\n");
3286 /* Similar to get_condition, only the resulting condition must be
3287 valid at JUMP, instead of at EARLIEST.
3289 This differs from noce_get_condition in ifcvt.c in that we prefer not to
3290 settle for the condition variable in the jump instruction being integral.
3291 We prefer to be able to record the value of a user variable, rather than
3292 the value of a temporary used in a condition. This could be solved by
3293 recording the value of *every* register scanned by canonicalize_condition,
3294 but this would require some code reorganization. */
3297 fis_get_condition (rtx jump
)
3299 return get_condition (jump
, NULL
, false, true);
3302 /* Check the comparison COND to see if we can safely form an implicit set from
3303 it. COND is either an EQ or NE comparison. */
3306 implicit_set_cond_p (rtx cond
)
3308 enum machine_mode mode
= GET_MODE (XEXP (cond
, 0));
3309 rtx cst
= XEXP (cond
, 1);
3311 /* We can't perform this optimization if either operand might be or might
3312 contain a signed zero. */
3313 if (HONOR_SIGNED_ZEROS (mode
))
3315 /* It is sufficient to check if CST is or contains a zero. We must
3316 handle float, complex, and vector. If any subpart is a zero, then
3317 the optimization can't be performed. */
3318 /* ??? The complex and vector checks are not implemented yet. We just
3319 always return zero for them. */
3320 if (GET_CODE (cst
) == CONST_DOUBLE
)
3323 REAL_VALUE_FROM_CONST_DOUBLE (d
, cst
);
3324 if (REAL_VALUES_EQUAL (d
, dconst0
))
3331 return gcse_constant_p (cst
);
3334 /* Find the implicit sets of a function. An "implicit set" is a constraint
3335 on the value of a variable, implied by a conditional jump. For example,
3336 following "if (x == 2)", the then branch may be optimized as though the
3337 conditional performed an "explicit set", in this example, "x = 2". This
3338 function records the set patterns that are implicit at the start of each
3342 find_implicit_sets (void)
3344 basic_block bb
, dest
;
3350 /* Check for more than one successor. */
3351 if (EDGE_COUNT (bb
->succs
) > 1)
3353 cond
= fis_get_condition (BB_END (bb
));
3356 && (GET_CODE (cond
) == EQ
|| GET_CODE (cond
) == NE
)
3357 && REG_P (XEXP (cond
, 0))
3358 && REGNO (XEXP (cond
, 0)) >= FIRST_PSEUDO_REGISTER
3359 && implicit_set_cond_p (cond
))
3361 dest
= GET_CODE (cond
) == EQ
? BRANCH_EDGE (bb
)->dest
3362 : FALLTHRU_EDGE (bb
)->dest
;
3364 if (dest
&& single_pred_p (dest
)
3365 && dest
!= EXIT_BLOCK_PTR
)
3367 new = gen_rtx_SET (VOIDmode
, XEXP (cond
, 0),
3369 implicit_sets
[dest
->index
] = new;
3372 fprintf(dump_file
, "Implicit set of reg %d in ",
3373 REGNO (XEXP (cond
, 0)));
3374 fprintf(dump_file
, "basic block %d\n", dest
->index
);
3382 fprintf (dump_file
, "Found %d implicit sets\n", count
);
3385 /* Perform one copy/constant propagation pass.
3386 PASS is the pass count. If CPROP_JUMPS is true, perform constant
3387 propagation into conditional jumps. If BYPASS_JUMPS is true,
3388 perform conditional jump bypassing optimizations. */
3391 one_cprop_pass (int pass
, bool cprop_jumps
, bool bypass_jumps
)
3395 global_const_prop_count
= local_const_prop_count
= 0;
3396 global_copy_prop_count
= local_copy_prop_count
= 0;
3398 local_cprop_pass (cprop_jumps
);
3400 /* Determine implicit sets. */
3401 implicit_sets
= XCNEWVEC (rtx
, last_basic_block
);
3402 find_implicit_sets ();
3404 alloc_hash_table (max_cuid
, &set_hash_table
, 1);
3405 compute_hash_table (&set_hash_table
);
3407 /* Free implicit_sets before peak usage. */
3408 free (implicit_sets
);
3409 implicit_sets
= NULL
;
3412 dump_hash_table (dump_file
, "SET", &set_hash_table
);
3413 if (set_hash_table
.n_elems
> 0)
3415 alloc_cprop_mem (last_basic_block
, set_hash_table
.n_elems
);
3416 compute_cprop_data ();
3417 changed
= cprop (cprop_jumps
);
3419 changed
|= bypass_conditional_jumps ();
3423 free_hash_table (&set_hash_table
);
3427 fprintf (dump_file
, "CPROP of %s, pass %d: %d bytes needed, ",
3428 current_function_name (), pass
, bytes_used
);
3429 fprintf (dump_file
, "%d local const props, %d local copy props, ",
3430 local_const_prop_count
, local_copy_prop_count
);
3431 fprintf (dump_file
, "%d global const props, %d global copy props\n\n",
3432 global_const_prop_count
, global_copy_prop_count
);
3434 /* Global analysis may get into infinite loops for unreachable blocks. */
3435 if (changed
&& cprop_jumps
)
3436 delete_unreachable_blocks ();
3441 /* Bypass conditional jumps. */
3443 /* The value of last_basic_block at the beginning of the jump_bypass
3444 pass. The use of redirect_edge_and_branch_force may introduce new
3445 basic blocks, but the data flow analysis is only valid for basic
3446 block indices less than bypass_last_basic_block. */
3448 static int bypass_last_basic_block
;
3450 /* Find a set of REGNO to a constant that is available at the end of basic
3451 block BB. Returns NULL if no such set is found. Based heavily upon
3454 static struct expr
*
3455 find_bypass_set (int regno
, int bb
)
3457 struct expr
*result
= 0;
3462 struct expr
*set
= lookup_set (regno
, &set_hash_table
);
3466 if (TEST_BIT (cprop_avout
[bb
], set
->bitmap_index
))
3468 set
= next_set (regno
, set
);
3474 gcc_assert (GET_CODE (set
->expr
) == SET
);
3476 src
= SET_SRC (set
->expr
);
3477 if (gcse_constant_p (src
))
3483 regno
= REGNO (src
);
3489 /* Subroutine of bypass_block that checks whether a pseudo is killed by
3490 any of the instructions inserted on an edge. Jump bypassing places
3491 condition code setters on CFG edges using insert_insn_on_edge. This
3492 function is required to check that our data flow analysis is still
3493 valid prior to commit_edge_insertions. */
3496 reg_killed_on_edge (rtx reg
, edge e
)
3500 for (insn
= e
->insns
.r
; insn
; insn
= NEXT_INSN (insn
))
3501 if (INSN_P (insn
) && reg_set_p (reg
, insn
))
3507 /* Subroutine of bypass_conditional_jumps that attempts to bypass the given
3508 basic block BB which has more than one predecessor. If not NULL, SETCC
3509 is the first instruction of BB, which is immediately followed by JUMP_INSN
3510 JUMP. Otherwise, SETCC is NULL, and JUMP is the first insn of BB.
3511 Returns nonzero if a change was made.
3513 During the jump bypassing pass, we may place copies of SETCC instructions
3514 on CFG edges. The following routine must be careful to pay attention to
3515 these inserted insns when performing its transformations. */
3518 bypass_block (basic_block bb
, rtx setcc
, rtx jump
)
3523 int may_be_loop_header
;
3527 insn
= (setcc
!= NULL
) ? setcc
: jump
;
3529 /* Determine set of register uses in INSN. */
3531 note_uses (&PATTERN (insn
), find_used_regs
, NULL
);
3532 note
= find_reg_equal_equiv_note (insn
);
3534 find_used_regs (&XEXP (note
, 0), NULL
);
3536 may_be_loop_header
= false;
3537 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
3538 if (e
->flags
& EDGE_DFS_BACK
)
3540 may_be_loop_header
= true;
3545 for (ei
= ei_start (bb
->preds
); (e
= ei_safe_edge (ei
)); )
3549 if (e
->flags
& EDGE_COMPLEX
)
3555 /* We can't redirect edges from new basic blocks. */
3556 if (e
->src
->index
>= bypass_last_basic_block
)
3562 /* The irreducible loops created by redirecting of edges entering the
3563 loop from outside would decrease effectiveness of some of the following
3564 optimizations, so prevent this. */
3565 if (may_be_loop_header
3566 && !(e
->flags
& EDGE_DFS_BACK
))
3572 for (i
= 0; i
< reg_use_count
; i
++)
3574 struct reg_use
*reg_used
= ®_use_table
[i
];
3575 unsigned int regno
= REGNO (reg_used
->reg_rtx
);
3576 basic_block dest
, old_dest
;
3580 if (regno
>= max_gcse_regno
)
3583 set
= find_bypass_set (regno
, e
->src
->index
);
3588 /* Check the data flow is valid after edge insertions. */
3589 if (e
->insns
.r
&& reg_killed_on_edge (reg_used
->reg_rtx
, e
))
3592 src
= SET_SRC (pc_set (jump
));
3595 src
= simplify_replace_rtx (src
,
3596 SET_DEST (PATTERN (setcc
)),
3597 SET_SRC (PATTERN (setcc
)));
3599 new = simplify_replace_rtx (src
, reg_used
->reg_rtx
,
3600 SET_SRC (set
->expr
));
3602 /* Jump bypassing may have already placed instructions on
3603 edges of the CFG. We can't bypass an outgoing edge that
3604 has instructions associated with it, as these insns won't
3605 get executed if the incoming edge is redirected. */
3609 edest
= FALLTHRU_EDGE (bb
);
3610 dest
= edest
->insns
.r
? NULL
: edest
->dest
;
3612 else if (GET_CODE (new) == LABEL_REF
)
3614 dest
= BLOCK_FOR_INSN (XEXP (new, 0));
3615 /* Don't bypass edges containing instructions. */
3616 edest
= find_edge (bb
, dest
);
3617 if (edest
&& edest
->insns
.r
)
3623 /* Avoid unification of the edge with other edges from original
3624 branch. We would end up emitting the instruction on "both"
3627 if (dest
&& setcc
&& !CC0_P (SET_DEST (PATTERN (setcc
)))
3628 && find_edge (e
->src
, dest
))
3634 && dest
!= EXIT_BLOCK_PTR
)
3636 redirect_edge_and_branch_force (e
, dest
);
3638 /* Copy the register setter to the redirected edge.
3639 Don't copy CC0 setters, as CC0 is dead after jump. */
3642 rtx pat
= PATTERN (setcc
);
3643 if (!CC0_P (SET_DEST (pat
)))
3644 insert_insn_on_edge (copy_insn (pat
), e
);
3647 if (dump_file
!= NULL
)
3649 fprintf (dump_file
, "JUMP-BYPASS: Proved reg %d "
3650 "in jump_insn %d equals constant ",
3651 regno
, INSN_UID (jump
));
3652 print_rtl (dump_file
, SET_SRC (set
->expr
));
3653 fprintf (dump_file
, "\nBypass edge from %d->%d to %d\n",
3654 e
->src
->index
, old_dest
->index
, dest
->index
);
3667 /* Find basic blocks with more than one predecessor that only contain a
3668 single conditional jump. If the result of the comparison is known at
3669 compile-time from any incoming edge, redirect that edge to the
3670 appropriate target. Returns nonzero if a change was made.
3672 This function is now mis-named, because we also handle indirect jumps. */
3675 bypass_conditional_jumps (void)
3683 /* Note we start at block 1. */
3684 if (ENTRY_BLOCK_PTR
->next_bb
== EXIT_BLOCK_PTR
)
3687 bypass_last_basic_block
= last_basic_block
;
3688 mark_dfs_back_edges ();
3691 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
->next_bb
->next_bb
,
3692 EXIT_BLOCK_PTR
, next_bb
)
3694 /* Check for more than one predecessor. */
3695 if (!single_pred_p (bb
))
3698 FOR_BB_INSNS (bb
, insn
)
3699 if (NONJUMP_INSN_P (insn
))
3703 if (GET_CODE (PATTERN (insn
)) != SET
)
3706 dest
= SET_DEST (PATTERN (insn
));
3707 if (REG_P (dest
) || CC0_P (dest
))
3712 else if (JUMP_P (insn
))
3714 if ((any_condjump_p (insn
) || computed_jump_p (insn
))
3715 && onlyjump_p (insn
))
3716 changed
|= bypass_block (bb
, setcc
, insn
);
3719 else if (INSN_P (insn
))
3724 /* If we bypassed any register setting insns, we inserted a
3725 copy on the redirected edge. These need to be committed. */
3727 commit_edge_insertions();
3732 /* Compute PRE+LCM working variables. */
3734 /* Local properties of expressions. */
3735 /* Nonzero for expressions that are transparent in the block. */
3736 static sbitmap
*transp
;
3738 /* Nonzero for expressions that are transparent at the end of the block.
3739 This is only zero for expressions killed by abnormal critical edge
3740 created by a calls. */
3741 static sbitmap
*transpout
;
3743 /* Nonzero for expressions that are computed (available) in the block. */
3744 static sbitmap
*comp
;
3746 /* Nonzero for expressions that are locally anticipatable in the block. */
3747 static sbitmap
*antloc
;
3749 /* Nonzero for expressions where this block is an optimal computation
3751 static sbitmap
*pre_optimal
;
3753 /* Nonzero for expressions which are redundant in a particular block. */
3754 static sbitmap
*pre_redundant
;
3756 /* Nonzero for expressions which should be inserted on a specific edge. */
3757 static sbitmap
*pre_insert_map
;
3759 /* Nonzero for expressions which should be deleted in a specific block. */
3760 static sbitmap
*pre_delete_map
;
3762 /* Contains the edge_list returned by pre_edge_lcm. */
3763 static struct edge_list
*edge_list
;
3765 /* Redundant insns. */
3766 static sbitmap pre_redundant_insns
;
3768 /* Allocate vars used for PRE analysis. */
3771 alloc_pre_mem (int n_blocks
, int n_exprs
)
3773 transp
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
3774 comp
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
3775 antloc
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
3778 pre_redundant
= NULL
;
3779 pre_insert_map
= NULL
;
3780 pre_delete_map
= NULL
;
3781 ae_kill
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
3783 /* pre_insert and pre_delete are allocated later. */
3786 /* Free vars used for PRE analysis. */
3791 sbitmap_vector_free (transp
);
3792 sbitmap_vector_free (comp
);
3794 /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */
3797 sbitmap_vector_free (pre_optimal
);
3799 sbitmap_vector_free (pre_redundant
);
3801 sbitmap_vector_free (pre_insert_map
);
3803 sbitmap_vector_free (pre_delete_map
);
3805 transp
= comp
= NULL
;
3806 pre_optimal
= pre_redundant
= pre_insert_map
= pre_delete_map
= NULL
;
3809 /* Top level routine to do the dataflow analysis needed by PRE. */
3812 compute_pre_data (void)
3814 sbitmap trapping_expr
;
3818 compute_local_properties (transp
, comp
, antloc
, &expr_hash_table
);
3819 sbitmap_vector_zero (ae_kill
, last_basic_block
);
3821 /* Collect expressions which might trap. */
3822 trapping_expr
= sbitmap_alloc (expr_hash_table
.n_elems
);
3823 sbitmap_zero (trapping_expr
);
3824 for (ui
= 0; ui
< expr_hash_table
.size
; ui
++)
3827 for (e
= expr_hash_table
.table
[ui
]; e
!= NULL
; e
= e
->next_same_hash
)
3828 if (may_trap_p (e
->expr
))
3829 SET_BIT (trapping_expr
, e
->bitmap_index
);
3832 /* Compute ae_kill for each basic block using:
3842 /* If the current block is the destination of an abnormal edge, we
3843 kill all trapping expressions because we won't be able to properly
3844 place the instruction on the edge. So make them neither
3845 anticipatable nor transparent. This is fairly conservative. */
3846 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
3847 if (e
->flags
& EDGE_ABNORMAL
)
3849 sbitmap_difference (antloc
[bb
->index
], antloc
[bb
->index
], trapping_expr
);
3850 sbitmap_difference (transp
[bb
->index
], transp
[bb
->index
], trapping_expr
);
3854 sbitmap_a_or_b (ae_kill
[bb
->index
], transp
[bb
->index
], comp
[bb
->index
]);
3855 sbitmap_not (ae_kill
[bb
->index
], ae_kill
[bb
->index
]);
3858 edge_list
= pre_edge_lcm (expr_hash_table
.n_elems
, transp
, comp
, antloc
,
3859 ae_kill
, &pre_insert_map
, &pre_delete_map
);
3860 sbitmap_vector_free (antloc
);
3862 sbitmap_vector_free (ae_kill
);
3864 sbitmap_free (trapping_expr
);
3869 /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach
3872 VISITED is a pointer to a working buffer for tracking which BB's have
3873 been visited. It is NULL for the top-level call.
3875 We treat reaching expressions that go through blocks containing the same
3876 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
3877 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
3878 2 as not reaching. The intent is to improve the probability of finding
3879 only one reaching expression and to reduce register lifetimes by picking
3880 the closest such expression. */
3883 pre_expr_reaches_here_p_work (basic_block occr_bb
, struct expr
*expr
, basic_block bb
, char *visited
)
3888 FOR_EACH_EDGE (pred
, ei
, bb
->preds
)
3890 basic_block pred_bb
= pred
->src
;
3892 if (pred
->src
== ENTRY_BLOCK_PTR
3893 /* Has predecessor has already been visited? */
3894 || visited
[pred_bb
->index
])
3895 ;/* Nothing to do. */
3897 /* Does this predecessor generate this expression? */
3898 else if (TEST_BIT (comp
[pred_bb
->index
], expr
->bitmap_index
))
3900 /* Is this the occurrence we're looking for?
3901 Note that there's only one generating occurrence per block
3902 so we just need to check the block number. */
3903 if (occr_bb
== pred_bb
)
3906 visited
[pred_bb
->index
] = 1;
3908 /* Ignore this predecessor if it kills the expression. */
3909 else if (! TEST_BIT (transp
[pred_bb
->index
], expr
->bitmap_index
))
3910 visited
[pred_bb
->index
] = 1;
3912 /* Neither gen nor kill. */
3915 visited
[pred_bb
->index
] = 1;
3916 if (pre_expr_reaches_here_p_work (occr_bb
, expr
, pred_bb
, visited
))
3921 /* All paths have been checked. */
3925 /* The wrapper for pre_expr_reaches_here_work that ensures that any
3926 memory allocated for that function is returned. */
3929 pre_expr_reaches_here_p (basic_block occr_bb
, struct expr
*expr
, basic_block bb
)
3932 char *visited
= XCNEWVEC (char, last_basic_block
);
3934 rval
= pre_expr_reaches_here_p_work (occr_bb
, expr
, bb
, visited
);
3941 /* Given an expr, generate RTL which we can insert at the end of a BB,
3942 or on an edge. Set the block number of any insns generated to
3946 process_insert_insn (struct expr
*expr
)
3948 rtx reg
= expr
->reaching_reg
;
3949 rtx exp
= copy_rtx (expr
->expr
);
3954 /* If the expression is something that's an operand, like a constant,
3955 just copy it to a register. */
3956 if (general_operand (exp
, GET_MODE (reg
)))
3957 emit_move_insn (reg
, exp
);
3959 /* Otherwise, make a new insn to compute this expression and make sure the
3960 insn will be recognized (this also adds any needed CLOBBERs). Copy the
3961 expression to make sure we don't have any sharing issues. */
3964 rtx insn
= emit_insn (gen_rtx_SET (VOIDmode
, reg
, exp
));
3966 if (insn_invalid_p (insn
))
3977 /* Add EXPR to the end of basic block BB.
3979 This is used by both the PRE and code hoisting.
3981 For PRE, we want to verify that the expr is either transparent
3982 or locally anticipatable in the target block. This check makes
3983 no sense for code hoisting. */
3986 insert_insn_end_bb (struct expr
*expr
, basic_block bb
, int pre
)
3988 rtx insn
= BB_END (bb
);
3990 rtx reg
= expr
->reaching_reg
;
3991 int regno
= REGNO (reg
);
3994 pat
= process_insert_insn (expr
);
3995 gcc_assert (pat
&& INSN_P (pat
));
3998 while (NEXT_INSN (pat_end
) != NULL_RTX
)
3999 pat_end
= NEXT_INSN (pat_end
);
4001 /* If the last insn is a jump, insert EXPR in front [taking care to
4002 handle cc0, etc. properly]. Similarly we need to care trapping
4003 instructions in presence of non-call exceptions. */
4006 || (NONJUMP_INSN_P (insn
)
4007 && (!single_succ_p (bb
)
4008 || single_succ_edge (bb
)->flags
& EDGE_ABNORMAL
)))
4013 /* It should always be the case that we can put these instructions
4014 anywhere in the basic block with performing PRE optimizations.
4016 gcc_assert (!NONJUMP_INSN_P (insn
) || !pre
4017 || TEST_BIT (antloc
[bb
->index
], expr
->bitmap_index
)
4018 || TEST_BIT (transp
[bb
->index
], expr
->bitmap_index
));
4020 /* If this is a jump table, then we can't insert stuff here. Since
4021 we know the previous real insn must be the tablejump, we insert
4022 the new instruction just before the tablejump. */
4023 if (GET_CODE (PATTERN (insn
)) == ADDR_VEC
4024 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
)
4025 insn
= prev_real_insn (insn
);
4028 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4029 if cc0 isn't set. */
4030 note
= find_reg_note (insn
, REG_CC_SETTER
, NULL_RTX
);
4032 insn
= XEXP (note
, 0);
4035 rtx maybe_cc0_setter
= prev_nonnote_insn (insn
);
4036 if (maybe_cc0_setter
4037 && INSN_P (maybe_cc0_setter
)
4038 && sets_cc0_p (PATTERN (maybe_cc0_setter
)))
4039 insn
= maybe_cc0_setter
;
4042 /* FIXME: What if something in cc0/jump uses value set in new insn? */
4043 new_insn
= emit_insn_before_noloc (pat
, insn
);
4046 /* Likewise if the last insn is a call, as will happen in the presence
4047 of exception handling. */
4048 else if (CALL_P (insn
)
4049 && (!single_succ_p (bb
)
4050 || single_succ_edge (bb
)->flags
& EDGE_ABNORMAL
))
4052 /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4053 we search backward and place the instructions before the first
4054 parameter is loaded. Do this for everyone for consistency and a
4055 presumption that we'll get better code elsewhere as well.
4057 It should always be the case that we can put these instructions
4058 anywhere in the basic block with performing PRE optimizations.
4062 || TEST_BIT (antloc
[bb
->index
], expr
->bitmap_index
)
4063 || TEST_BIT (transp
[bb
->index
], expr
->bitmap_index
));
4065 /* Since different machines initialize their parameter registers
4066 in different orders, assume nothing. Collect the set of all
4067 parameter registers. */
4068 insn
= find_first_parameter_load (insn
, BB_HEAD (bb
));
4070 /* If we found all the parameter loads, then we want to insert
4071 before the first parameter load.
4073 If we did not find all the parameter loads, then we might have
4074 stopped on the head of the block, which could be a CODE_LABEL.
4075 If we inserted before the CODE_LABEL, then we would be putting
4076 the insn in the wrong basic block. In that case, put the insn
4077 after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */
4078 while (LABEL_P (insn
)
4079 || NOTE_INSN_BASIC_BLOCK_P (insn
))
4080 insn
= NEXT_INSN (insn
);
4082 new_insn
= emit_insn_before_noloc (pat
, insn
);
4085 new_insn
= emit_insn_after_noloc (pat
, insn
);
4091 add_label_notes (PATTERN (pat
), new_insn
);
4092 note_stores (PATTERN (pat
), record_set_info
, pat
);
4096 pat
= NEXT_INSN (pat
);
4099 gcse_create_count
++;
4103 fprintf (dump_file
, "PRE/HOIST: end of bb %d, insn %d, ",
4104 bb
->index
, INSN_UID (new_insn
));
4105 fprintf (dump_file
, "copying expression %d to reg %d\n",
4106 expr
->bitmap_index
, regno
);
4110 /* Insert partially redundant expressions on edges in the CFG to make
4111 the expressions fully redundant. */
4114 pre_edge_insert (struct edge_list
*edge_list
, struct expr
**index_map
)
4116 int e
, i
, j
, num_edges
, set_size
, did_insert
= 0;
4119 /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
4120 if it reaches any of the deleted expressions. */
4122 set_size
= pre_insert_map
[0]->size
;
4123 num_edges
= NUM_EDGES (edge_list
);
4124 inserted
= sbitmap_vector_alloc (num_edges
, expr_hash_table
.n_elems
);
4125 sbitmap_vector_zero (inserted
, num_edges
);
4127 for (e
= 0; e
< num_edges
; e
++)
4130 basic_block bb
= INDEX_EDGE_PRED_BB (edge_list
, e
);
4132 for (i
= indx
= 0; i
< set_size
; i
++, indx
+= SBITMAP_ELT_BITS
)
4134 SBITMAP_ELT_TYPE insert
= pre_insert_map
[e
]->elms
[i
];
4136 for (j
= indx
; insert
&& j
< (int) expr_hash_table
.n_elems
; j
++, insert
>>= 1)
4137 if ((insert
& 1) != 0 && index_map
[j
]->reaching_reg
!= NULL_RTX
)
4139 struct expr
*expr
= index_map
[j
];
4142 /* Now look at each deleted occurrence of this expression. */
4143 for (occr
= expr
->antic_occr
; occr
!= NULL
; occr
= occr
->next
)
4145 if (! occr
->deleted_p
)
4148 /* Insert this expression on this edge if it would
4149 reach the deleted occurrence in BB. */
4150 if (!TEST_BIT (inserted
[e
], j
))
4153 edge eg
= INDEX_EDGE (edge_list
, e
);
4155 /* We can't insert anything on an abnormal and
4156 critical edge, so we insert the insn at the end of
4157 the previous block. There are several alternatives
4158 detailed in Morgans book P277 (sec 10.5) for
4159 handling this situation. This one is easiest for
4162 if (eg
->flags
& EDGE_ABNORMAL
)
4163 insert_insn_end_bb (index_map
[j
], bb
, 0);
4166 insn
= process_insert_insn (index_map
[j
]);
4167 insert_insn_on_edge (insn
, eg
);
4172 fprintf (dump_file
, "PRE/HOIST: edge (%d,%d), ",
4174 INDEX_EDGE_SUCC_BB (edge_list
, e
)->index
);
4175 fprintf (dump_file
, "copy expression %d\n",
4176 expr
->bitmap_index
);
4179 update_ld_motion_stores (expr
);
4180 SET_BIT (inserted
[e
], j
);
4182 gcse_create_count
++;
4189 sbitmap_vector_free (inserted
);
4193 /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
4194 Given "old_reg <- expr" (INSN), instead of adding after it
4195 reaching_reg <- old_reg
4196 it's better to do the following:
4197 reaching_reg <- expr
4198 old_reg <- reaching_reg
4199 because this way copy propagation can discover additional PRE
4200 opportunities. But if this fails, we try the old way.
4201 When "expr" is a store, i.e.
4202 given "MEM <- old_reg", instead of adding after it
4203 reaching_reg <- old_reg
4204 it's better to add it before as follows:
4205 reaching_reg <- old_reg
4206 MEM <- reaching_reg. */
4209 pre_insert_copy_insn (struct expr
*expr
, rtx insn
)
4211 rtx reg
= expr
->reaching_reg
;
4212 int regno
= REGNO (reg
);
4213 int indx
= expr
->bitmap_index
;
4214 rtx pat
= PATTERN (insn
);
4215 rtx set
, first_set
, new_insn
;
4219 /* This block matches the logic in hash_scan_insn. */
4220 switch (GET_CODE (pat
))
4227 /* Search through the parallel looking for the set whose
4228 source was the expression that we're interested in. */
4229 first_set
= NULL_RTX
;
4231 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
4233 rtx x
= XVECEXP (pat
, 0, i
);
4234 if (GET_CODE (x
) == SET
)
4236 /* If the source was a REG_EQUAL or REG_EQUIV note, we
4237 may not find an equivalent expression, but in this
4238 case the PARALLEL will have a single set. */
4239 if (first_set
== NULL_RTX
)
4241 if (expr_equiv_p (SET_SRC (x
), expr
->expr
))
4249 gcc_assert (first_set
);
4250 if (set
== NULL_RTX
)
4258 if (REG_P (SET_DEST (set
)))
4260 old_reg
= SET_DEST (set
);
4261 /* Check if we can modify the set destination in the original insn. */
4262 if (validate_change (insn
, &SET_DEST (set
), reg
, 0))
4264 new_insn
= gen_move_insn (old_reg
, reg
);
4265 new_insn
= emit_insn_after (new_insn
, insn
);
4267 /* Keep register set table up to date. */
4268 record_one_set (regno
, insn
);
4272 new_insn
= gen_move_insn (reg
, old_reg
);
4273 new_insn
= emit_insn_after (new_insn
, insn
);
4275 /* Keep register set table up to date. */
4276 record_one_set (regno
, new_insn
);
4279 else /* This is possible only in case of a store to memory. */
4281 old_reg
= SET_SRC (set
);
4282 new_insn
= gen_move_insn (reg
, old_reg
);
4284 /* Check if we can modify the set source in the original insn. */
4285 if (validate_change (insn
, &SET_SRC (set
), reg
, 0))
4286 new_insn
= emit_insn_before (new_insn
, insn
);
4288 new_insn
= emit_insn_after (new_insn
, insn
);
4290 /* Keep register set table up to date. */
4291 record_one_set (regno
, new_insn
);
4294 gcse_create_count
++;
4298 "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
4299 BLOCK_NUM (insn
), INSN_UID (new_insn
), indx
,
4300 INSN_UID (insn
), regno
);
4303 /* Copy available expressions that reach the redundant expression
4304 to `reaching_reg'. */
4307 pre_insert_copies (void)
4309 unsigned int i
, added_copy
;
4314 /* For each available expression in the table, copy the result to
4315 `reaching_reg' if the expression reaches a deleted one.
4317 ??? The current algorithm is rather brute force.
4318 Need to do some profiling. */
4320 for (i
= 0; i
< expr_hash_table
.size
; i
++)
4321 for (expr
= expr_hash_table
.table
[i
]; expr
!= NULL
; expr
= expr
->next_same_hash
)
4323 /* If the basic block isn't reachable, PPOUT will be TRUE. However,
4324 we don't want to insert a copy here because the expression may not
4325 really be redundant. So only insert an insn if the expression was
4326 deleted. This test also avoids further processing if the
4327 expression wasn't deleted anywhere. */
4328 if (expr
->reaching_reg
== NULL
)
4331 /* Set when we add a copy for that expression. */
4334 for (occr
= expr
->antic_occr
; occr
!= NULL
; occr
= occr
->next
)
4336 if (! occr
->deleted_p
)
4339 for (avail
= expr
->avail_occr
; avail
!= NULL
; avail
= avail
->next
)
4341 rtx insn
= avail
->insn
;
4343 /* No need to handle this one if handled already. */
4344 if (avail
->copied_p
)
4347 /* Don't handle this one if it's a redundant one. */
4348 if (TEST_BIT (pre_redundant_insns
, INSN_CUID (insn
)))
4351 /* Or if the expression doesn't reach the deleted one. */
4352 if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail
->insn
),
4354 BLOCK_FOR_INSN (occr
->insn
)))
4359 /* Copy the result of avail to reaching_reg. */
4360 pre_insert_copy_insn (expr
, insn
);
4361 avail
->copied_p
= 1;
4366 update_ld_motion_stores (expr
);
4370 /* Emit move from SRC to DEST noting the equivalence with expression computed
4373 gcse_emit_move_after (rtx src
, rtx dest
, rtx insn
)
4376 rtx set
= single_set (insn
), set2
;
4380 /* This should never fail since we're creating a reg->reg copy
4381 we've verified to be valid. */
4383 new = emit_insn_after (gen_move_insn (dest
, src
), insn
);
4385 /* Note the equivalence for local CSE pass. */
4386 set2
= single_set (new);
4387 if (!set2
|| !rtx_equal_p (SET_DEST (set2
), dest
))
4389 if ((note
= find_reg_equal_equiv_note (insn
)))
4390 eqv
= XEXP (note
, 0);
4392 eqv
= SET_SRC (set
);
4394 set_unique_reg_note (new, REG_EQUAL
, copy_insn_1 (eqv
));
4399 /* Delete redundant computations.
4400 Deletion is done by changing the insn to copy the `reaching_reg' of
4401 the expression into the result of the SET. It is left to later passes
4402 (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4404 Returns nonzero if a change is made. */
4415 for (i
= 0; i
< expr_hash_table
.size
; i
++)
4416 for (expr
= expr_hash_table
.table
[i
];
4418 expr
= expr
->next_same_hash
)
4420 int indx
= expr
->bitmap_index
;
4422 /* We only need to search antic_occr since we require
4425 for (occr
= expr
->antic_occr
; occr
!= NULL
; occr
= occr
->next
)
4427 rtx insn
= occr
->insn
;
4429 basic_block bb
= BLOCK_FOR_INSN (insn
);
4431 /* We only delete insns that have a single_set. */
4432 if (TEST_BIT (pre_delete_map
[bb
->index
], indx
)
4433 && (set
= single_set (insn
)) != 0)
4435 /* Create a pseudo-reg to store the result of reaching
4436 expressions into. Get the mode for the new pseudo from
4437 the mode of the original destination pseudo. */
4438 if (expr
->reaching_reg
== NULL
)
4440 = gen_reg_rtx (GET_MODE (SET_DEST (set
)));
4442 gcse_emit_move_after (expr
->reaching_reg
, SET_DEST (set
), insn
);
4444 occr
->deleted_p
= 1;
4445 SET_BIT (pre_redundant_insns
, INSN_CUID (insn
));
4452 "PRE: redundant insn %d (expression %d) in ",
4453 INSN_UID (insn
), indx
);
4454 fprintf (dump_file
, "bb %d, reaching reg is %d\n",
4455 bb
->index
, REGNO (expr
->reaching_reg
));
4464 /* Perform GCSE optimizations using PRE.
4465 This is called by one_pre_gcse_pass after all the dataflow analysis
4468 This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
4469 lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
4470 Compiler Design and Implementation.
4472 ??? A new pseudo reg is created to hold the reaching expression. The nice
4473 thing about the classical approach is that it would try to use an existing
4474 reg. If the register can't be adequately optimized [i.e. we introduce
4475 reload problems], one could add a pass here to propagate the new register
4478 ??? We don't handle single sets in PARALLELs because we're [currently] not
4479 able to copy the rest of the parallel when we insert copies to create full
4480 redundancies from partial redundancies. However, there's no reason why we
4481 can't handle PARALLELs in the cases where there are no partial
4488 int did_insert
, changed
;
4489 struct expr
**index_map
;
4492 /* Compute a mapping from expression number (`bitmap_index') to
4493 hash table entry. */
4495 index_map
= XCNEWVEC (struct expr
*, expr_hash_table
.n_elems
);
4496 for (i
= 0; i
< expr_hash_table
.size
; i
++)
4497 for (expr
= expr_hash_table
.table
[i
]; expr
!= NULL
; expr
= expr
->next_same_hash
)
4498 index_map
[expr
->bitmap_index
] = expr
;
4500 /* Reset bitmap used to track which insns are redundant. */
4501 pre_redundant_insns
= sbitmap_alloc (max_cuid
);
4502 sbitmap_zero (pre_redundant_insns
);
4504 /* Delete the redundant insns first so that
4505 - we know what register to use for the new insns and for the other
4506 ones with reaching expressions
4507 - we know which insns are redundant when we go to create copies */
4509 changed
= pre_delete ();
4511 did_insert
= pre_edge_insert (edge_list
, index_map
);
4513 /* In other places with reaching expressions, copy the expression to the
4514 specially allocated pseudo-reg that reaches the redundant expr. */
4515 pre_insert_copies ();
4518 commit_edge_insertions ();
4523 sbitmap_free (pre_redundant_insns
);
4527 /* Top level routine to perform one PRE GCSE pass.
4529 Return nonzero if a change was made. */
4532 one_pre_gcse_pass (int pass
)
4536 gcse_subst_count
= 0;
4537 gcse_create_count
= 0;
4539 alloc_hash_table (max_cuid
, &expr_hash_table
, 0);
4540 add_noreturn_fake_exit_edges ();
4542 compute_ld_motion_mems ();
4544 compute_hash_table (&expr_hash_table
);
4545 trim_ld_motion_mems ();
4547 dump_hash_table (dump_file
, "Expression", &expr_hash_table
);
4549 if (expr_hash_table
.n_elems
> 0)
4551 alloc_pre_mem (last_basic_block
, expr_hash_table
.n_elems
);
4552 compute_pre_data ();
4553 changed
|= pre_gcse ();
4554 free_edge_list (edge_list
);
4559 remove_fake_exit_edges ();
4560 free_hash_table (&expr_hash_table
);
4564 fprintf (dump_file
, "\nPRE GCSE of %s, pass %d: %d bytes needed, ",
4565 current_function_name (), pass
, bytes_used
);
4566 fprintf (dump_file
, "%d substs, %d insns created\n",
4567 gcse_subst_count
, gcse_create_count
);
4573 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
4574 If notes are added to an insn which references a CODE_LABEL, the
4575 LABEL_NUSES count is incremented. We have to add REG_LABEL notes,
4576 because the following loop optimization pass requires them. */
4578 /* ??? If there was a jump optimization pass after gcse and before loop,
4579 then we would not need to do this here, because jump would add the
4580 necessary REG_LABEL notes. */
4583 add_label_notes (rtx x
, rtx insn
)
4585 enum rtx_code code
= GET_CODE (x
);
4589 if (code
== LABEL_REF
&& !LABEL_REF_NONLOCAL_P (x
))
4591 /* This code used to ignore labels that referred to dispatch tables to
4592 avoid flow generating (slightly) worse code.
4594 We no longer ignore such label references (see LABEL_REF handling in
4595 mark_jump_label for additional information). */
4597 REG_NOTES (insn
) = gen_rtx_INSN_LIST (REG_LABEL
, XEXP (x
, 0),
4599 if (LABEL_P (XEXP (x
, 0)))
4600 LABEL_NUSES (XEXP (x
, 0))++;
4604 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
4607 add_label_notes (XEXP (x
, i
), insn
);
4608 else if (fmt
[i
] == 'E')
4609 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
4610 add_label_notes (XVECEXP (x
, i
, j
), insn
);
4614 /* Compute transparent outgoing information for each block.
4616 An expression is transparent to an edge unless it is killed by
4617 the edge itself. This can only happen with abnormal control flow,
4618 when the edge is traversed through a call. This happens with
4619 non-local labels and exceptions.
4621 This would not be necessary if we split the edge. While this is
4622 normally impossible for abnormal critical edges, with some effort
4623 it should be possible with exception handling, since we still have
4624 control over which handler should be invoked. But due to increased
4625 EH table sizes, this may not be worthwhile. */
4628 compute_transpout (void)
4634 sbitmap_vector_ones (transpout
, last_basic_block
);
4638 /* Note that flow inserted a nop a the end of basic blocks that
4639 end in call instructions for reasons other than abnormal
4641 if (! CALL_P (BB_END (bb
)))
4644 for (i
= 0; i
< expr_hash_table
.size
; i
++)
4645 for (expr
= expr_hash_table
.table
[i
]; expr
; expr
= expr
->next_same_hash
)
4646 if (MEM_P (expr
->expr
))
4648 if (GET_CODE (XEXP (expr
->expr
, 0)) == SYMBOL_REF
4649 && CONSTANT_POOL_ADDRESS_P (XEXP (expr
->expr
, 0)))
4652 /* ??? Optimally, we would use interprocedural alias
4653 analysis to determine if this mem is actually killed
4655 RESET_BIT (transpout
[bb
->index
], expr
->bitmap_index
);
4660 /* Code Hoisting variables and subroutines. */
4662 /* Very busy expressions. */
4663 static sbitmap
*hoist_vbein
;
4664 static sbitmap
*hoist_vbeout
;
4666 /* Hoistable expressions. */
4667 static sbitmap
*hoist_exprs
;
4669 /* ??? We could compute post dominators and run this algorithm in
4670 reverse to perform tail merging, doing so would probably be
4671 more effective than the tail merging code in jump.c.
4673 It's unclear if tail merging could be run in parallel with
4674 code hoisting. It would be nice. */
4676 /* Allocate vars used for code hoisting analysis. */
4679 alloc_code_hoist_mem (int n_blocks
, int n_exprs
)
4681 antloc
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
4682 transp
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
4683 comp
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
4685 hoist_vbein
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
4686 hoist_vbeout
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
4687 hoist_exprs
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
4688 transpout
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
4691 /* Free vars used for code hoisting analysis. */
4694 free_code_hoist_mem (void)
4696 sbitmap_vector_free (antloc
);
4697 sbitmap_vector_free (transp
);
4698 sbitmap_vector_free (comp
);
4700 sbitmap_vector_free (hoist_vbein
);
4701 sbitmap_vector_free (hoist_vbeout
);
4702 sbitmap_vector_free (hoist_exprs
);
4703 sbitmap_vector_free (transpout
);
4705 free_dominance_info (CDI_DOMINATORS
);
4708 /* Compute the very busy expressions at entry/exit from each block.
4710 An expression is very busy if all paths from a given point
4711 compute the expression. */
4714 compute_code_hoist_vbeinout (void)
4716 int changed
, passes
;
4719 sbitmap_vector_zero (hoist_vbeout
, last_basic_block
);
4720 sbitmap_vector_zero (hoist_vbein
, last_basic_block
);
4729 /* We scan the blocks in the reverse order to speed up
4731 FOR_EACH_BB_REVERSE (bb
)
4733 changed
|= sbitmap_a_or_b_and_c_cg (hoist_vbein
[bb
->index
], antloc
[bb
->index
],
4734 hoist_vbeout
[bb
->index
], transp
[bb
->index
]);
4735 if (bb
->next_bb
!= EXIT_BLOCK_PTR
)
4736 sbitmap_intersection_of_succs (hoist_vbeout
[bb
->index
], hoist_vbein
, bb
->index
);
4743 fprintf (dump_file
, "hoisting vbeinout computation: %d passes\n", passes
);
4746 /* Top level routine to do the dataflow analysis needed by code hoisting. */
4749 compute_code_hoist_data (void)
4751 compute_local_properties (transp
, comp
, antloc
, &expr_hash_table
);
4752 compute_transpout ();
4753 compute_code_hoist_vbeinout ();
4754 calculate_dominance_info (CDI_DOMINATORS
);
4756 fprintf (dump_file
, "\n");
4759 /* Determine if the expression identified by EXPR_INDEX would
4760 reach BB unimpared if it was placed at the end of EXPR_BB.
4762 It's unclear exactly what Muchnick meant by "unimpared". It seems
4763 to me that the expression must either be computed or transparent in
4764 *every* block in the path(s) from EXPR_BB to BB. Any other definition
4765 would allow the expression to be hoisted out of loops, even if
4766 the expression wasn't a loop invariant.
4768 Contrast this to reachability for PRE where an expression is
4769 considered reachable if *any* path reaches instead of *all*
4773 hoist_expr_reaches_here_p (basic_block expr_bb
, int expr_index
, basic_block bb
, char *visited
)
4777 int visited_allocated_locally
= 0;
4780 if (visited
== NULL
)
4782 visited_allocated_locally
= 1;
4783 visited
= XCNEWVEC (char, last_basic_block
);
4786 FOR_EACH_EDGE (pred
, ei
, bb
->preds
)
4788 basic_block pred_bb
= pred
->src
;
4790 if (pred
->src
== ENTRY_BLOCK_PTR
)
4792 else if (pred_bb
== expr_bb
)
4794 else if (visited
[pred_bb
->index
])
4797 /* Does this predecessor generate this expression? */
4798 else if (TEST_BIT (comp
[pred_bb
->index
], expr_index
))
4800 else if (! TEST_BIT (transp
[pred_bb
->index
], expr_index
))
4806 visited
[pred_bb
->index
] = 1;
4807 if (! hoist_expr_reaches_here_p (expr_bb
, expr_index
,
4812 if (visited_allocated_locally
)
4815 return (pred
== NULL
);
4818 /* Actually perform code hoisting. */
4823 basic_block bb
, dominated
;
4825 unsigned int domby_len
;
4827 struct expr
**index_map
;
4830 sbitmap_vector_zero (hoist_exprs
, last_basic_block
);
4832 /* Compute a mapping from expression number (`bitmap_index') to
4833 hash table entry. */
4835 index_map
= XCNEWVEC (struct expr
*, expr_hash_table
.n_elems
);
4836 for (i
= 0; i
< expr_hash_table
.size
; i
++)
4837 for (expr
= expr_hash_table
.table
[i
]; expr
!= NULL
; expr
= expr
->next_same_hash
)
4838 index_map
[expr
->bitmap_index
] = expr
;
4840 /* Walk over each basic block looking for potentially hoistable
4841 expressions, nothing gets hoisted from the entry block. */
4845 int insn_inserted_p
;
4847 domby_len
= get_dominated_by (CDI_DOMINATORS
, bb
, &domby
);
4848 /* Examine each expression that is very busy at the exit of this
4849 block. These are the potentially hoistable expressions. */
4850 for (i
= 0; i
< hoist_vbeout
[bb
->index
]->n_bits
; i
++)
4854 if (TEST_BIT (hoist_vbeout
[bb
->index
], i
)
4855 && TEST_BIT (transpout
[bb
->index
], i
))
4857 /* We've found a potentially hoistable expression, now
4858 we look at every block BB dominates to see if it
4859 computes the expression. */
4860 for (j
= 0; j
< domby_len
; j
++)
4862 dominated
= domby
[j
];
4863 /* Ignore self dominance. */
4864 if (bb
== dominated
)
4866 /* We've found a dominated block, now see if it computes
4867 the busy expression and whether or not moving that
4868 expression to the "beginning" of that block is safe. */
4869 if (!TEST_BIT (antloc
[dominated
->index
], i
))
4872 /* Note if the expression would reach the dominated block
4873 unimpared if it was placed at the end of BB.
4875 Keep track of how many times this expression is hoistable
4876 from a dominated block into BB. */
4877 if (hoist_expr_reaches_here_p (bb
, i
, dominated
, NULL
))
4881 /* If we found more than one hoistable occurrence of this
4882 expression, then note it in the bitmap of expressions to
4883 hoist. It makes no sense to hoist things which are computed
4884 in only one BB, and doing so tends to pessimize register
4885 allocation. One could increase this value to try harder
4886 to avoid any possible code expansion due to register
4887 allocation issues; however experiments have shown that
4888 the vast majority of hoistable expressions are only movable
4889 from two successors, so raising this threshold is likely
4890 to nullify any benefit we get from code hoisting. */
4893 SET_BIT (hoist_exprs
[bb
->index
], i
);
4898 /* If we found nothing to hoist, then quit now. */
4905 /* Loop over all the hoistable expressions. */
4906 for (i
= 0; i
< hoist_exprs
[bb
->index
]->n_bits
; i
++)
4908 /* We want to insert the expression into BB only once, so
4909 note when we've inserted it. */
4910 insn_inserted_p
= 0;
4912 /* These tests should be the same as the tests above. */
4913 if (TEST_BIT (hoist_exprs
[bb
->index
], i
))
4915 /* We've found a potentially hoistable expression, now
4916 we look at every block BB dominates to see if it
4917 computes the expression. */
4918 for (j
= 0; j
< domby_len
; j
++)
4920 dominated
= domby
[j
];
4921 /* Ignore self dominance. */
4922 if (bb
== dominated
)
4925 /* We've found a dominated block, now see if it computes
4926 the busy expression and whether or not moving that
4927 expression to the "beginning" of that block is safe. */
4928 if (!TEST_BIT (antloc
[dominated
->index
], i
))
4931 /* The expression is computed in the dominated block and
4932 it would be safe to compute it at the start of the
4933 dominated block. Now we have to determine if the
4934 expression would reach the dominated block if it was
4935 placed at the end of BB. */
4936 if (hoist_expr_reaches_here_p (bb
, i
, dominated
, NULL
))
4938 struct expr
*expr
= index_map
[i
];
4939 struct occr
*occr
= expr
->antic_occr
;
4943 /* Find the right occurrence of this expression. */
4944 while (BLOCK_FOR_INSN (occr
->insn
) != dominated
&& occr
)
4949 set
= single_set (insn
);
4952 /* Create a pseudo-reg to store the result of reaching
4953 expressions into. Get the mode for the new pseudo
4954 from the mode of the original destination pseudo. */
4955 if (expr
->reaching_reg
== NULL
)
4957 = gen_reg_rtx (GET_MODE (SET_DEST (set
)));
4959 gcse_emit_move_after (expr
->reaching_reg
, SET_DEST (set
), insn
);
4961 occr
->deleted_p
= 1;
4962 if (!insn_inserted_p
)
4964 insert_insn_end_bb (index_map
[i
], bb
, 0);
4965 insn_inserted_p
= 1;
4977 /* Top level routine to perform one code hoisting (aka unification) pass
4979 Return nonzero if a change was made. */
4982 one_code_hoisting_pass (void)
4986 alloc_hash_table (max_cuid
, &expr_hash_table
, 0);
4987 compute_hash_table (&expr_hash_table
);
4989 dump_hash_table (dump_file
, "Code Hosting Expressions", &expr_hash_table
);
4991 if (expr_hash_table
.n_elems
> 0)
4993 alloc_code_hoist_mem (last_basic_block
, expr_hash_table
.n_elems
);
4994 compute_code_hoist_data ();
4996 free_code_hoist_mem ();
4999 free_hash_table (&expr_hash_table
);
5004 /* Here we provide the things required to do store motion towards
5005 the exit. In order for this to be effective, gcse also needed to
5006 be taught how to move a load when it is kill only by a store to itself.
5011 void foo(float scale)
5013 for (i=0; i<10; i++)
5017 'i' is both loaded and stored to in the loop. Normally, gcse cannot move
5018 the load out since its live around the loop, and stored at the bottom
5021 The 'Load Motion' referred to and implemented in this file is
5022 an enhancement to gcse which when using edge based lcm, recognizes
5023 this situation and allows gcse to move the load out of the loop.
5025 Once gcse has hoisted the load, store motion can then push this
5026 load towards the exit, and we end up with no loads or stores of 'i'
5030 pre_ldst_expr_hash (const void *p
)
5032 int do_not_record_p
= 0;
5033 const struct ls_expr
*x
= p
;
5034 return hash_rtx (x
->pattern
, GET_MODE (x
->pattern
), &do_not_record_p
, NULL
, false);
5038 pre_ldst_expr_eq (const void *p1
, const void *p2
)
5040 const struct ls_expr
*ptr1
= p1
, *ptr2
= p2
;
5041 return expr_equiv_p (ptr1
->pattern
, ptr2
->pattern
);
5044 /* This will search the ldst list for a matching expression. If it
5045 doesn't find one, we create one and initialize it. */
5047 static struct ls_expr
*
5050 int do_not_record_p
= 0;
5051 struct ls_expr
* ptr
;
5056 hash
= hash_rtx (x
, GET_MODE (x
), &do_not_record_p
,
5057 NULL
, /*have_reg_qty=*/false);
5060 slot
= htab_find_slot_with_hash (pre_ldst_table
, &e
, hash
, INSERT
);
5062 return (struct ls_expr
*)*slot
;
5064 ptr
= XNEW (struct ls_expr
);
5066 ptr
->next
= pre_ldst_mems
;
5069 ptr
->pattern_regs
= NULL_RTX
;
5070 ptr
->loads
= NULL_RTX
;
5071 ptr
->stores
= NULL_RTX
;
5072 ptr
->reaching_reg
= NULL_RTX
;
5075 ptr
->hash_index
= hash
;
5076 pre_ldst_mems
= ptr
;
5082 /* Free up an individual ldst entry. */
5085 free_ldst_entry (struct ls_expr
* ptr
)
5087 free_INSN_LIST_list (& ptr
->loads
);
5088 free_INSN_LIST_list (& ptr
->stores
);
5093 /* Free up all memory associated with the ldst list. */
5096 free_ldst_mems (void)
5099 htab_delete (pre_ldst_table
);
5100 pre_ldst_table
= NULL
;
5102 while (pre_ldst_mems
)
5104 struct ls_expr
* tmp
= pre_ldst_mems
;
5106 pre_ldst_mems
= pre_ldst_mems
->next
;
5108 free_ldst_entry (tmp
);
5111 pre_ldst_mems
= NULL
;
5114 /* Dump debugging info about the ldst list. */
5117 print_ldst_list (FILE * file
)
5119 struct ls_expr
* ptr
;
5121 fprintf (file
, "LDST list: \n");
5123 for (ptr
= first_ls_expr(); ptr
!= NULL
; ptr
= next_ls_expr (ptr
))
5125 fprintf (file
, " Pattern (%3d): ", ptr
->index
);
5127 print_rtl (file
, ptr
->pattern
);
5129 fprintf (file
, "\n Loads : ");
5132 print_rtl (file
, ptr
->loads
);
5134 fprintf (file
, "(nil)");
5136 fprintf (file
, "\n Stores : ");
5139 print_rtl (file
, ptr
->stores
);
5141 fprintf (file
, "(nil)");
5143 fprintf (file
, "\n\n");
5146 fprintf (file
, "\n");
5149 /* Returns 1 if X is in the list of ldst only expressions. */
5151 static struct ls_expr
*
5152 find_rtx_in_ldst (rtx x
)
5156 if (!pre_ldst_table
)
5159 slot
= htab_find_slot (pre_ldst_table
, &e
, NO_INSERT
);
5160 if (!slot
|| ((struct ls_expr
*)*slot
)->invalid
)
5165 /* Assign each element of the list of mems a monotonically increasing value. */
5168 enumerate_ldsts (void)
5170 struct ls_expr
* ptr
;
5173 for (ptr
= pre_ldst_mems
; ptr
!= NULL
; ptr
= ptr
->next
)
5179 /* Return first item in the list. */
5181 static inline struct ls_expr
*
5182 first_ls_expr (void)
5184 return pre_ldst_mems
;
5187 /* Return the next item in the list after the specified one. */
5189 static inline struct ls_expr
*
5190 next_ls_expr (struct ls_expr
* ptr
)
5195 /* Load Motion for loads which only kill themselves. */
5197 /* Return true if x is a simple MEM operation, with no registers or
5198 side effects. These are the types of loads we consider for the
5199 ld_motion list, otherwise we let the usual aliasing take care of it. */
5207 if (MEM_VOLATILE_P (x
))
5210 if (GET_MODE (x
) == BLKmode
)
5213 /* If we are handling exceptions, we must be careful with memory references
5214 that may trap. If we are not, the behavior is undefined, so we may just
5216 if (flag_non_call_exceptions
&& may_trap_p (x
))
5219 if (side_effects_p (x
))
5222 /* Do not consider function arguments passed on stack. */
5223 if (reg_mentioned_p (stack_pointer_rtx
, x
))
5226 if (flag_float_store
&& FLOAT_MODE_P (GET_MODE (x
)))
5232 /* Make sure there isn't a buried reference in this pattern anywhere.
5233 If there is, invalidate the entry for it since we're not capable
5234 of fixing it up just yet.. We have to be sure we know about ALL
5235 loads since the aliasing code will allow all entries in the
5236 ld_motion list to not-alias itself. If we miss a load, we will get
5237 the wrong value since gcse might common it and we won't know to
5241 invalidate_any_buried_refs (rtx x
)
5245 struct ls_expr
* ptr
;
5247 /* Invalidate it in the list. */
5248 if (MEM_P (x
) && simple_mem (x
))
5250 ptr
= ldst_entry (x
);
5254 /* Recursively process the insn. */
5255 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
5257 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
5260 invalidate_any_buried_refs (XEXP (x
, i
));
5261 else if (fmt
[i
] == 'E')
5262 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
5263 invalidate_any_buried_refs (XVECEXP (x
, i
, j
));
5267 /* Find all the 'simple' MEMs which are used in LOADs and STORES. Simple
5268 being defined as MEM loads and stores to symbols, with no side effects
5269 and no registers in the expression. For a MEM destination, we also
5270 check that the insn is still valid if we replace the destination with a
5271 REG, as is done in update_ld_motion_stores. If there are any uses/defs
5272 which don't match this criteria, they are invalidated and trimmed out
5276 compute_ld_motion_mems (void)
5278 struct ls_expr
* ptr
;
5282 pre_ldst_mems
= NULL
;
5283 pre_ldst_table
= htab_create (13, pre_ldst_expr_hash
,
5284 pre_ldst_expr_eq
, NULL
);
5288 FOR_BB_INSNS (bb
, insn
)
5292 if (GET_CODE (PATTERN (insn
)) == SET
)
5294 rtx src
= SET_SRC (PATTERN (insn
));
5295 rtx dest
= SET_DEST (PATTERN (insn
));
5297 /* Check for a simple LOAD... */
5298 if (MEM_P (src
) && simple_mem (src
))
5300 ptr
= ldst_entry (src
);
5302 ptr
->loads
= alloc_INSN_LIST (insn
, ptr
->loads
);
5308 /* Make sure there isn't a buried load somewhere. */
5309 invalidate_any_buried_refs (src
);
5312 /* Check for stores. Don't worry about aliased ones, they
5313 will block any movement we might do later. We only care
5314 about this exact pattern since those are the only
5315 circumstance that we will ignore the aliasing info. */
5316 if (MEM_P (dest
) && simple_mem (dest
))
5318 ptr
= ldst_entry (dest
);
5321 && GET_CODE (src
) != ASM_OPERANDS
5322 /* Check for REG manually since want_to_gcse_p
5323 returns 0 for all REGs. */
5324 && can_assign_to_reg_p (src
))
5325 ptr
->stores
= alloc_INSN_LIST (insn
, ptr
->stores
);
5331 invalidate_any_buried_refs (PATTERN (insn
));
5337 /* Remove any references that have been either invalidated or are not in the
5338 expression list for pre gcse. */
5341 trim_ld_motion_mems (void)
5343 struct ls_expr
* * last
= & pre_ldst_mems
;
5344 struct ls_expr
* ptr
= pre_ldst_mems
;
5350 /* Delete if entry has been made invalid. */
5353 /* Delete if we cannot find this mem in the expression list. */
5354 unsigned int hash
= ptr
->hash_index
% expr_hash_table
.size
;
5356 for (expr
= expr_hash_table
.table
[hash
];
5358 expr
= expr
->next_same_hash
)
5359 if (expr_equiv_p (expr
->expr
, ptr
->pattern
))
5363 expr
= (struct expr
*) 0;
5367 /* Set the expression field if we are keeping it. */
5375 htab_remove_elt_with_hash (pre_ldst_table
, ptr
, ptr
->hash_index
);
5376 free_ldst_entry (ptr
);
5381 /* Show the world what we've found. */
5382 if (dump_file
&& pre_ldst_mems
!= NULL
)
5383 print_ldst_list (dump_file
);
5386 /* This routine will take an expression which we are replacing with
5387 a reaching register, and update any stores that are needed if
5388 that expression is in the ld_motion list. Stores are updated by
5389 copying their SRC to the reaching register, and then storing
5390 the reaching register into the store location. These keeps the
5391 correct value in the reaching register for the loads. */
5394 update_ld_motion_stores (struct expr
* expr
)
5396 struct ls_expr
* mem_ptr
;
5398 if ((mem_ptr
= find_rtx_in_ldst (expr
->expr
)))
5400 /* We can try to find just the REACHED stores, but is shouldn't
5401 matter to set the reaching reg everywhere... some might be
5402 dead and should be eliminated later. */
5404 /* We replace (set mem expr) with (set reg expr) (set mem reg)
5405 where reg is the reaching reg used in the load. We checked in
5406 compute_ld_motion_mems that we can replace (set mem expr) with
5407 (set reg expr) in that insn. */
5408 rtx list
= mem_ptr
->stores
;
5410 for ( ; list
!= NULL_RTX
; list
= XEXP (list
, 1))
5412 rtx insn
= XEXP (list
, 0);
5413 rtx pat
= PATTERN (insn
);
5414 rtx src
= SET_SRC (pat
);
5415 rtx reg
= expr
->reaching_reg
;
5418 /* If we've already copied it, continue. */
5419 if (expr
->reaching_reg
== src
)
5424 fprintf (dump_file
, "PRE: store updated with reaching reg ");
5425 print_rtl (dump_file
, expr
->reaching_reg
);
5426 fprintf (dump_file
, ":\n ");
5427 print_inline_rtx (dump_file
, insn
, 8);
5428 fprintf (dump_file
, "\n");
5431 copy
= gen_move_insn ( reg
, copy_rtx (SET_SRC (pat
)));
5432 new = emit_insn_before (copy
, insn
);
5433 record_one_set (REGNO (reg
), new);
5434 SET_SRC (pat
) = reg
;
5436 /* un-recognize this pattern since it's probably different now. */
5437 INSN_CODE (insn
) = -1;
5438 gcse_create_count
++;
5443 /* Store motion code. */
5445 #define ANTIC_STORE_LIST(x) ((x)->loads)
5446 #define AVAIL_STORE_LIST(x) ((x)->stores)
5447 #define LAST_AVAIL_CHECK_FAILURE(x) ((x)->reaching_reg)
5449 /* This is used to communicate the target bitvector we want to use in the
5450 reg_set_info routine when called via the note_stores mechanism. */
5451 static int * regvec
;
5453 /* And current insn, for the same routine. */
5454 static rtx compute_store_table_current_insn
;
5456 /* Used in computing the reverse edge graph bit vectors. */
5457 static sbitmap
* st_antloc
;
5459 /* Global holding the number of store expressions we are dealing with. */
5460 static int num_stores
;
5462 /* Checks to set if we need to mark a register set. Called from
5466 reg_set_info (rtx dest
, rtx setter ATTRIBUTE_UNUSED
,
5469 sbitmap bb_reg
= data
;
5471 if (GET_CODE (dest
) == SUBREG
)
5472 dest
= SUBREG_REG (dest
);
5476 regvec
[REGNO (dest
)] = INSN_UID (compute_store_table_current_insn
);
5478 SET_BIT (bb_reg
, REGNO (dest
));
5482 /* Clear any mark that says that this insn sets dest. Called from
5486 reg_clear_last_set (rtx dest
, rtx setter ATTRIBUTE_UNUSED
,
5489 int *dead_vec
= data
;
5491 if (GET_CODE (dest
) == SUBREG
)
5492 dest
= SUBREG_REG (dest
);
5495 dead_vec
[REGNO (dest
)] == INSN_UID (compute_store_table_current_insn
))
5496 dead_vec
[REGNO (dest
)] = 0;
5499 /* Return zero if some of the registers in list X are killed
5500 due to set of registers in bitmap REGS_SET. */
5503 store_ops_ok (rtx x
, int *regs_set
)
5507 for (; x
; x
= XEXP (x
, 1))
5510 if (regs_set
[REGNO(reg
)])
5517 /* Returns a list of registers mentioned in X. */
5519 extract_mentioned_regs (rtx x
)
5521 return extract_mentioned_regs_helper (x
, NULL_RTX
);
5524 /* Helper for extract_mentioned_regs; ACCUM is used to accumulate used
5527 extract_mentioned_regs_helper (rtx x
, rtx accum
)
5533 /* Repeat is used to turn tail-recursion into iteration. */
5539 code
= GET_CODE (x
);
5543 return alloc_EXPR_LIST (0, x
, accum
);
5553 /* We do not run this function with arguments having side effects. */
5572 i
= GET_RTX_LENGTH (code
) - 1;
5573 fmt
= GET_RTX_FORMAT (code
);
5579 rtx tem
= XEXP (x
, i
);
5581 /* If we are about to do the last recursive call
5582 needed at this level, change it into iteration. */
5589 accum
= extract_mentioned_regs_helper (tem
, accum
);
5591 else if (fmt
[i
] == 'E')
5595 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
5596 accum
= extract_mentioned_regs_helper (XVECEXP (x
, i
, j
), accum
);
5603 /* Determine whether INSN is MEM store pattern that we will consider moving.
5604 REGS_SET_BEFORE is bitmap of registers set before (and including) the
5605 current insn, REGS_SET_AFTER is bitmap of registers set after (and
5606 including) the insn in this basic block. We must be passing through BB from
5607 head to end, as we are using this fact to speed things up.
5609 The results are stored this way:
5611 -- the first anticipatable expression is added into ANTIC_STORE_LIST
5612 -- if the processed expression is not anticipatable, NULL_RTX is added
5613 there instead, so that we can use it as indicator that no further
5614 expression of this type may be anticipatable
5615 -- if the expression is available, it is added as head of AVAIL_STORE_LIST;
5616 consequently, all of them but this head are dead and may be deleted.
5617 -- if the expression is not available, the insn due to that it fails to be
5618 available is stored in reaching_reg.
5620 The things are complicated a bit by fact that there already may be stores
5621 to the same MEM from other blocks; also caller must take care of the
5622 necessary cleanup of the temporary markers after end of the basic block.
5626 find_moveable_store (rtx insn
, int *regs_set_before
, int *regs_set_after
)
5628 struct ls_expr
* ptr
;
5630 int check_anticipatable
, check_available
;
5631 basic_block bb
= BLOCK_FOR_INSN (insn
);
5633 set
= single_set (insn
);
5637 dest
= SET_DEST (set
);
5639 if (! MEM_P (dest
) || MEM_VOLATILE_P (dest
)
5640 || GET_MODE (dest
) == BLKmode
)
5643 if (side_effects_p (dest
))
5646 /* If we are handling exceptions, we must be careful with memory references
5647 that may trap. If we are not, the behavior is undefined, so we may just
5649 if (flag_non_call_exceptions
&& may_trap_p (dest
))
5652 /* Even if the destination cannot trap, the source may. In this case we'd
5653 need to handle updating the REG_EH_REGION note. */
5654 if (find_reg_note (insn
, REG_EH_REGION
, NULL_RTX
))
5657 /* Make sure that the SET_SRC of this store insns can be assigned to
5658 a register, or we will fail later on in replace_store_insn, which
5659 assumes that we can do this. But sometimes the target machine has
5660 oddities like MEM read-modify-write instruction. See for example
5662 if (!can_assign_to_reg_p (SET_SRC (set
)))
5665 ptr
= ldst_entry (dest
);
5666 if (!ptr
->pattern_regs
)
5667 ptr
->pattern_regs
= extract_mentioned_regs (dest
);
5669 /* Do not check for anticipatability if we either found one anticipatable
5670 store already, or tested for one and found out that it was killed. */
5671 check_anticipatable
= 0;
5672 if (!ANTIC_STORE_LIST (ptr
))
5673 check_anticipatable
= 1;
5676 tmp
= XEXP (ANTIC_STORE_LIST (ptr
), 0);
5678 && BLOCK_FOR_INSN (tmp
) != bb
)
5679 check_anticipatable
= 1;
5681 if (check_anticipatable
)
5683 if (store_killed_before (dest
, ptr
->pattern_regs
, insn
, bb
, regs_set_before
))
5687 ANTIC_STORE_LIST (ptr
) = alloc_INSN_LIST (tmp
,
5688 ANTIC_STORE_LIST (ptr
));
5691 /* It is not necessary to check whether store is available if we did
5692 it successfully before; if we failed before, do not bother to check
5693 until we reach the insn that caused us to fail. */
5694 check_available
= 0;
5695 if (!AVAIL_STORE_LIST (ptr
))
5696 check_available
= 1;
5699 tmp
= XEXP (AVAIL_STORE_LIST (ptr
), 0);
5700 if (BLOCK_FOR_INSN (tmp
) != bb
)
5701 check_available
= 1;
5703 if (check_available
)
5705 /* Check that we have already reached the insn at that the check
5706 failed last time. */
5707 if (LAST_AVAIL_CHECK_FAILURE (ptr
))
5709 for (tmp
= BB_END (bb
);
5710 tmp
!= insn
&& tmp
!= LAST_AVAIL_CHECK_FAILURE (ptr
);
5711 tmp
= PREV_INSN (tmp
))
5714 check_available
= 0;
5717 check_available
= store_killed_after (dest
, ptr
->pattern_regs
, insn
,
5719 &LAST_AVAIL_CHECK_FAILURE (ptr
));
5721 if (!check_available
)
5722 AVAIL_STORE_LIST (ptr
) = alloc_INSN_LIST (insn
, AVAIL_STORE_LIST (ptr
));
5725 /* Find available and anticipatable stores. */
5728 compute_store_table (void)
5734 int *last_set_in
, *already_set
;
5735 struct ls_expr
* ptr
, **prev_next_ptr_ptr
;
5737 max_gcse_regno
= max_reg_num ();
5739 reg_set_in_block
= sbitmap_vector_alloc (last_basic_block
,
5741 sbitmap_vector_zero (reg_set_in_block
, last_basic_block
);
5743 pre_ldst_table
= htab_create (13, pre_ldst_expr_hash
,
5744 pre_ldst_expr_eq
, NULL
);
5745 last_set_in
= XCNEWVEC (int, max_gcse_regno
);
5746 already_set
= XNEWVEC (int, max_gcse_regno
);
5748 /* Find all the stores we care about. */
5751 /* First compute the registers set in this block. */
5752 regvec
= last_set_in
;
5754 FOR_BB_INSNS (bb
, insn
)
5756 if (! INSN_P (insn
))
5761 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5762 if (TEST_HARD_REG_BIT (regs_invalidated_by_call
, regno
))
5764 last_set_in
[regno
] = INSN_UID (insn
);
5765 SET_BIT (reg_set_in_block
[bb
->index
], regno
);
5769 pat
= PATTERN (insn
);
5770 compute_store_table_current_insn
= insn
;
5771 note_stores (pat
, reg_set_info
, reg_set_in_block
[bb
->index
]);
5774 /* Now find the stores. */
5775 memset (already_set
, 0, sizeof (int) * max_gcse_regno
);
5776 regvec
= already_set
;
5777 FOR_BB_INSNS (bb
, insn
)
5779 if (! INSN_P (insn
))
5784 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5785 if (TEST_HARD_REG_BIT (regs_invalidated_by_call
, regno
))
5786 already_set
[regno
] = 1;
5789 pat
= PATTERN (insn
);
5790 note_stores (pat
, reg_set_info
, NULL
);
5792 /* Now that we've marked regs, look for stores. */
5793 find_moveable_store (insn
, already_set
, last_set_in
);
5795 /* Unmark regs that are no longer set. */
5796 compute_store_table_current_insn
= insn
;
5797 note_stores (pat
, reg_clear_last_set
, last_set_in
);
5800 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5801 if (TEST_HARD_REG_BIT (regs_invalidated_by_call
, regno
)
5802 && last_set_in
[regno
] == INSN_UID (insn
))
5803 last_set_in
[regno
] = 0;
5807 #ifdef ENABLE_CHECKING
5808 /* last_set_in should now be all-zero. */
5809 for (regno
= 0; regno
< max_gcse_regno
; regno
++)
5810 gcc_assert (!last_set_in
[regno
]);
5813 /* Clear temporary marks. */
5814 for (ptr
= first_ls_expr (); ptr
!= NULL
; ptr
= next_ls_expr (ptr
))
5816 LAST_AVAIL_CHECK_FAILURE(ptr
) = NULL_RTX
;
5817 if (ANTIC_STORE_LIST (ptr
)
5818 && (tmp
= XEXP (ANTIC_STORE_LIST (ptr
), 0)) == NULL_RTX
)
5819 ANTIC_STORE_LIST (ptr
) = XEXP (ANTIC_STORE_LIST (ptr
), 1);
5823 /* Remove the stores that are not available anywhere, as there will
5824 be no opportunity to optimize them. */
5825 for (ptr
= pre_ldst_mems
, prev_next_ptr_ptr
= &pre_ldst_mems
;
5827 ptr
= *prev_next_ptr_ptr
)
5829 if (!AVAIL_STORE_LIST (ptr
))
5831 *prev_next_ptr_ptr
= ptr
->next
;
5832 htab_remove_elt_with_hash (pre_ldst_table
, ptr
, ptr
->hash_index
);
5833 free_ldst_entry (ptr
);
5836 prev_next_ptr_ptr
= &ptr
->next
;
5839 ret
= enumerate_ldsts ();
5843 fprintf (dump_file
, "ST_avail and ST_antic (shown under loads..)\n");
5844 print_ldst_list (dump_file
);
5852 /* Check to see if the load X is aliased with STORE_PATTERN.
5853 AFTER is true if we are checking the case when STORE_PATTERN occurs
5857 load_kills_store (rtx x
, rtx store_pattern
, int after
)
5860 return anti_dependence (x
, store_pattern
);
5862 return true_dependence (store_pattern
, GET_MODE (store_pattern
), x
,
5866 /* Go through the entire insn X, looking for any loads which might alias
5867 STORE_PATTERN. Return true if found.
5868 AFTER is true if we are checking the case when STORE_PATTERN occurs
5869 after the insn X. */
5872 find_loads (rtx x
, rtx store_pattern
, int after
)
5881 if (GET_CODE (x
) == SET
)
5886 if (load_kills_store (x
, store_pattern
, after
))
5890 /* Recursively process the insn. */
5891 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
5893 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0 && !ret
; i
--)
5896 ret
|= find_loads (XEXP (x
, i
), store_pattern
, after
);
5897 else if (fmt
[i
] == 'E')
5898 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
5899 ret
|= find_loads (XVECEXP (x
, i
, j
), store_pattern
, after
);
5904 /* Check if INSN kills the store pattern X (is aliased with it).
5905 AFTER is true if we are checking the case when store X occurs
5906 after the insn. Return true if it does. */
5909 store_killed_in_insn (rtx x
, rtx x_regs
, rtx insn
, int after
)
5911 rtx reg
, base
, note
;
5918 /* A normal or pure call might read from pattern,
5919 but a const call will not. */
5920 if (! CONST_OR_PURE_CALL_P (insn
) || pure_call_p (insn
))
5923 /* But even a const call reads its parameters. Check whether the
5924 base of some of registers used in mem is stack pointer. */
5925 for (reg
= x_regs
; reg
; reg
= XEXP (reg
, 1))
5927 base
= find_base_term (XEXP (reg
, 0));
5929 || (GET_CODE (base
) == ADDRESS
5930 && GET_MODE (base
) == Pmode
5931 && XEXP (base
, 0) == stack_pointer_rtx
))
5938 if (GET_CODE (PATTERN (insn
)) == SET
)
5940 rtx pat
= PATTERN (insn
);
5941 rtx dest
= SET_DEST (pat
);
5943 if (GET_CODE (dest
) == ZERO_EXTRACT
)
5944 dest
= XEXP (dest
, 0);
5946 /* Check for memory stores to aliased objects. */
5948 && !expr_equiv_p (dest
, x
))
5952 if (output_dependence (dest
, x
))
5957 if (output_dependence (x
, dest
))
5961 if (find_loads (SET_SRC (pat
), x
, after
))
5964 else if (find_loads (PATTERN (insn
), x
, after
))
5967 /* If this insn has a REG_EQUAL or REG_EQUIV note referencing a memory
5968 location aliased with X, then this insn kills X. */
5969 note
= find_reg_equal_equiv_note (insn
);
5972 note
= XEXP (note
, 0);
5974 /* However, if the note represents a must alias rather than a may
5975 alias relationship, then it does not kill X. */
5976 if (expr_equiv_p (note
, x
))
5979 /* See if there are any aliased loads in the note. */
5980 return find_loads (note
, x
, after
);
5983 /* Returns true if the expression X is loaded or clobbered on or after INSN
5984 within basic block BB. REGS_SET_AFTER is bitmap of registers set in
5985 or after the insn. X_REGS is list of registers mentioned in X. If the store
5986 is killed, return the last insn in that it occurs in FAIL_INSN. */
5989 store_killed_after (rtx x
, rtx x_regs
, rtx insn
, basic_block bb
,
5990 int *regs_set_after
, rtx
*fail_insn
)
5992 rtx last
= BB_END (bb
), act
;
5994 if (!store_ops_ok (x_regs
, regs_set_after
))
5996 /* We do not know where it will happen. */
5998 *fail_insn
= NULL_RTX
;
6002 /* Scan from the end, so that fail_insn is determined correctly. */
6003 for (act
= last
; act
!= PREV_INSN (insn
); act
= PREV_INSN (act
))
6004 if (store_killed_in_insn (x
, x_regs
, act
, false))
6014 /* Returns true if the expression X is loaded or clobbered on or before INSN
6015 within basic block BB. X_REGS is list of registers mentioned in X.
6016 REGS_SET_BEFORE is bitmap of registers set before or in this insn. */
6018 store_killed_before (rtx x
, rtx x_regs
, rtx insn
, basic_block bb
,
6019 int *regs_set_before
)
6021 rtx first
= BB_HEAD (bb
);
6023 if (!store_ops_ok (x_regs
, regs_set_before
))
6026 for ( ; insn
!= PREV_INSN (first
); insn
= PREV_INSN (insn
))
6027 if (store_killed_in_insn (x
, x_regs
, insn
, true))
6033 /* Fill in available, anticipatable, transparent and kill vectors in
6034 STORE_DATA, based on lists of available and anticipatable stores. */
6036 build_store_vectors (void)
6039 int *regs_set_in_block
;
6041 struct ls_expr
* ptr
;
6044 /* Build the gen_vector. This is any store in the table which is not killed
6045 by aliasing later in its block. */
6046 ae_gen
= sbitmap_vector_alloc (last_basic_block
, num_stores
);
6047 sbitmap_vector_zero (ae_gen
, last_basic_block
);
6049 st_antloc
= sbitmap_vector_alloc (last_basic_block
, num_stores
);
6050 sbitmap_vector_zero (st_antloc
, last_basic_block
);
6052 for (ptr
= first_ls_expr (); ptr
!= NULL
; ptr
= next_ls_expr (ptr
))
6054 for (st
= AVAIL_STORE_LIST (ptr
); st
!= NULL
; st
= XEXP (st
, 1))
6056 insn
= XEXP (st
, 0);
6057 bb
= BLOCK_FOR_INSN (insn
);
6059 /* If we've already seen an available expression in this block,
6060 we can delete this one (It occurs earlier in the block). We'll
6061 copy the SRC expression to an unused register in case there
6062 are any side effects. */
6063 if (TEST_BIT (ae_gen
[bb
->index
], ptr
->index
))
6065 rtx r
= gen_reg_rtx (GET_MODE (ptr
->pattern
));
6067 fprintf (dump_file
, "Removing redundant store:\n");
6068 replace_store_insn (r
, XEXP (st
, 0), bb
, ptr
);
6071 SET_BIT (ae_gen
[bb
->index
], ptr
->index
);
6074 for (st
= ANTIC_STORE_LIST (ptr
); st
!= NULL
; st
= XEXP (st
, 1))
6076 insn
= XEXP (st
, 0);
6077 bb
= BLOCK_FOR_INSN (insn
);
6078 SET_BIT (st_antloc
[bb
->index
], ptr
->index
);
6082 ae_kill
= sbitmap_vector_alloc (last_basic_block
, num_stores
);
6083 sbitmap_vector_zero (ae_kill
, last_basic_block
);
6085 transp
= sbitmap_vector_alloc (last_basic_block
, num_stores
);
6086 sbitmap_vector_zero (transp
, last_basic_block
);
6087 regs_set_in_block
= XNEWVEC (int, max_gcse_regno
);
6091 for (regno
= 0; regno
< max_gcse_regno
; regno
++)
6092 regs_set_in_block
[regno
] = TEST_BIT (reg_set_in_block
[bb
->index
], regno
);
6094 for (ptr
= first_ls_expr (); ptr
!= NULL
; ptr
= next_ls_expr (ptr
))
6096 if (store_killed_after (ptr
->pattern
, ptr
->pattern_regs
, BB_HEAD (bb
),
6097 bb
, regs_set_in_block
, NULL
))
6099 /* It should not be necessary to consider the expression
6100 killed if it is both anticipatable and available. */
6101 if (!TEST_BIT (st_antloc
[bb
->index
], ptr
->index
)
6102 || !TEST_BIT (ae_gen
[bb
->index
], ptr
->index
))
6103 SET_BIT (ae_kill
[bb
->index
], ptr
->index
);
6106 SET_BIT (transp
[bb
->index
], ptr
->index
);
6110 free (regs_set_in_block
);
6114 dump_sbitmap_vector (dump_file
, "st_antloc", "", st_antloc
, last_basic_block
);
6115 dump_sbitmap_vector (dump_file
, "st_kill", "", ae_kill
, last_basic_block
);
6116 dump_sbitmap_vector (dump_file
, "Transpt", "", transp
, last_basic_block
);
6117 dump_sbitmap_vector (dump_file
, "st_avloc", "", ae_gen
, last_basic_block
);
6121 /* Insert an instruction at the beginning of a basic block, and update
6122 the BB_HEAD if needed. */
6125 insert_insn_start_bb (rtx insn
, basic_block bb
)
6127 /* Insert at start of successor block. */
6128 rtx prev
= PREV_INSN (BB_HEAD (bb
));
6129 rtx before
= BB_HEAD (bb
);
6132 if (! LABEL_P (before
)
6133 && (! NOTE_P (before
)
6134 || NOTE_LINE_NUMBER (before
) != NOTE_INSN_BASIC_BLOCK
))
6137 if (prev
== BB_END (bb
))
6139 before
= NEXT_INSN (before
);
6142 insn
= emit_insn_after_noloc (insn
, prev
);
6146 fprintf (dump_file
, "STORE_MOTION insert store at start of BB %d:\n",
6148 print_inline_rtx (dump_file
, insn
, 6);
6149 fprintf (dump_file
, "\n");
6153 /* This routine will insert a store on an edge. EXPR is the ldst entry for
6154 the memory reference, and E is the edge to insert it on. Returns nonzero
6155 if an edge insertion was performed. */
6158 insert_store (struct ls_expr
* expr
, edge e
)
6165 /* We did all the deleted before this insert, so if we didn't delete a
6166 store, then we haven't set the reaching reg yet either. */
6167 if (expr
->reaching_reg
== NULL_RTX
)
6170 if (e
->flags
& EDGE_FAKE
)
6173 reg
= expr
->reaching_reg
;
6174 insn
= gen_move_insn (copy_rtx (expr
->pattern
), reg
);
6176 /* If we are inserting this expression on ALL predecessor edges of a BB,
6177 insert it at the start of the BB, and reset the insert bits on the other
6178 edges so we don't try to insert it on the other edges. */
6180 FOR_EACH_EDGE (tmp
, ei
, e
->dest
->preds
)
6181 if (!(tmp
->flags
& EDGE_FAKE
))
6183 int index
= EDGE_INDEX (edge_list
, tmp
->src
, tmp
->dest
);
6185 gcc_assert (index
!= EDGE_INDEX_NO_EDGE
);
6186 if (! TEST_BIT (pre_insert_map
[index
], expr
->index
))
6190 /* If tmp is NULL, we found an insertion on every edge, blank the
6191 insertion vector for these edges, and insert at the start of the BB. */
6192 if (!tmp
&& bb
!= EXIT_BLOCK_PTR
)
6194 FOR_EACH_EDGE (tmp
, ei
, e
->dest
->preds
)
6196 int index
= EDGE_INDEX (edge_list
, tmp
->src
, tmp
->dest
);
6197 RESET_BIT (pre_insert_map
[index
], expr
->index
);
6199 insert_insn_start_bb (insn
, bb
);
6203 /* We can't put stores in the front of blocks pointed to by abnormal
6204 edges since that may put a store where one didn't used to be. */
6205 gcc_assert (!(e
->flags
& EDGE_ABNORMAL
));
6207 insert_insn_on_edge (insn
, e
);
6211 fprintf (dump_file
, "STORE_MOTION insert insn on edge (%d, %d):\n",
6212 e
->src
->index
, e
->dest
->index
);
6213 print_inline_rtx (dump_file
, insn
, 6);
6214 fprintf (dump_file
, "\n");
6220 /* Remove any REG_EQUAL or REG_EQUIV notes containing a reference to the
6221 memory location in SMEXPR set in basic block BB.
6223 This could be rather expensive. */
6226 remove_reachable_equiv_notes (basic_block bb
, struct ls_expr
*smexpr
)
6228 edge_iterator
*stack
, ei
;
6231 sbitmap visited
= sbitmap_alloc (last_basic_block
);
6232 rtx last
, insn
, note
;
6233 rtx mem
= smexpr
->pattern
;
6235 stack
= XNEWVEC (edge_iterator
, n_basic_blocks
);
6237 ei
= ei_start (bb
->succs
);
6239 sbitmap_zero (visited
);
6241 act
= (EDGE_COUNT (ei_container (ei
)) > 0 ? EDGE_I (ei_container (ei
), 0) : NULL
);
6249 sbitmap_free (visited
);
6252 act
= ei_edge (stack
[--sp
]);
6256 if (bb
== EXIT_BLOCK_PTR
6257 || TEST_BIT (visited
, bb
->index
))
6261 act
= (! ei_end_p (ei
)) ? ei_edge (ei
) : NULL
;
6264 SET_BIT (visited
, bb
->index
);
6266 if (TEST_BIT (st_antloc
[bb
->index
], smexpr
->index
))
6268 for (last
= ANTIC_STORE_LIST (smexpr
);
6269 BLOCK_FOR_INSN (XEXP (last
, 0)) != bb
;
6270 last
= XEXP (last
, 1))
6272 last
= XEXP (last
, 0);
6275 last
= NEXT_INSN (BB_END (bb
));
6277 for (insn
= BB_HEAD (bb
); insn
!= last
; insn
= NEXT_INSN (insn
))
6280 note
= find_reg_equal_equiv_note (insn
);
6281 if (!note
|| !expr_equiv_p (XEXP (note
, 0), mem
))
6285 fprintf (dump_file
, "STORE_MOTION drop REG_EQUAL note at insn %d:\n",
6287 remove_note (insn
, note
);
6292 act
= (! ei_end_p (ei
)) ? ei_edge (ei
) : NULL
;
6294 if (EDGE_COUNT (bb
->succs
) > 0)
6298 ei
= ei_start (bb
->succs
);
6299 act
= (EDGE_COUNT (ei_container (ei
)) > 0 ? EDGE_I (ei_container (ei
), 0) : NULL
);
6304 /* This routine will replace a store with a SET to a specified register. */
6307 replace_store_insn (rtx reg
, rtx del
, basic_block bb
, struct ls_expr
*smexpr
)
6309 rtx insn
, mem
, note
, set
, ptr
, pair
;
6311 mem
= smexpr
->pattern
;
6312 insn
= gen_move_insn (reg
, SET_SRC (single_set (del
)));
6313 insn
= emit_insn_after (insn
, del
);
6318 "STORE_MOTION delete insn in BB %d:\n ", bb
->index
);
6319 print_inline_rtx (dump_file
, del
, 6);
6320 fprintf (dump_file
, "\nSTORE MOTION replaced with insn:\n ");
6321 print_inline_rtx (dump_file
, insn
, 6);
6322 fprintf (dump_file
, "\n");
6325 for (ptr
= ANTIC_STORE_LIST (smexpr
); ptr
; ptr
= XEXP (ptr
, 1))
6326 if (XEXP (ptr
, 0) == del
)
6328 XEXP (ptr
, 0) = insn
;
6332 /* Move the notes from the deleted insn to its replacement, and patch
6333 up the LIBCALL notes. */
6334 REG_NOTES (insn
) = REG_NOTES (del
);
6336 note
= find_reg_note (insn
, REG_RETVAL
, NULL_RTX
);
6339 pair
= XEXP (note
, 0);
6340 note
= find_reg_note (pair
, REG_LIBCALL
, NULL_RTX
);
6341 XEXP (note
, 0) = insn
;
6343 note
= find_reg_note (insn
, REG_LIBCALL
, NULL_RTX
);
6346 pair
= XEXP (note
, 0);
6347 note
= find_reg_note (pair
, REG_RETVAL
, NULL_RTX
);
6348 XEXP (note
, 0) = insn
;
6353 /* Now we must handle REG_EQUAL notes whose contents is equal to the mem;
6354 they are no longer accurate provided that they are reached by this
6355 definition, so drop them. */
6356 for (; insn
!= NEXT_INSN (BB_END (bb
)); insn
= NEXT_INSN (insn
))
6359 set
= single_set (insn
);
6362 if (expr_equiv_p (SET_DEST (set
), mem
))
6364 note
= find_reg_equal_equiv_note (insn
);
6365 if (!note
|| !expr_equiv_p (XEXP (note
, 0), mem
))
6369 fprintf (dump_file
, "STORE_MOTION drop REG_EQUAL note at insn %d:\n",
6371 remove_note (insn
, note
);
6373 remove_reachable_equiv_notes (bb
, smexpr
);
6377 /* Delete a store, but copy the value that would have been stored into
6378 the reaching_reg for later storing. */
6381 delete_store (struct ls_expr
* expr
, basic_block bb
)
6385 if (expr
->reaching_reg
== NULL_RTX
)
6386 expr
->reaching_reg
= gen_reg_rtx (GET_MODE (expr
->pattern
));
6388 reg
= expr
->reaching_reg
;
6390 for (i
= AVAIL_STORE_LIST (expr
); i
; i
= XEXP (i
, 1))
6393 if (BLOCK_FOR_INSN (del
) == bb
)
6395 /* We know there is only one since we deleted redundant
6396 ones during the available computation. */
6397 replace_store_insn (reg
, del
, bb
, expr
);
6403 /* Free memory used by store motion. */
6406 free_store_memory (void)
6411 sbitmap_vector_free (ae_gen
);
6413 sbitmap_vector_free (ae_kill
);
6415 sbitmap_vector_free (transp
);
6417 sbitmap_vector_free (st_antloc
);
6419 sbitmap_vector_free (pre_insert_map
);
6421 sbitmap_vector_free (pre_delete_map
);
6422 if (reg_set_in_block
)
6423 sbitmap_vector_free (reg_set_in_block
);
6425 ae_gen
= ae_kill
= transp
= st_antloc
= NULL
;
6426 pre_insert_map
= pre_delete_map
= reg_set_in_block
= NULL
;
6429 /* Perform store motion. Much like gcse, except we move expressions the
6430 other way by looking at the flowgraph in reverse. */
6437 struct ls_expr
* ptr
;
6438 int update_flow
= 0;
6442 fprintf (dump_file
, "before store motion\n");
6443 print_rtl (dump_file
, get_insns ());
6446 init_alias_analysis ();
6448 /* Find all the available and anticipatable stores. */
6449 num_stores
= compute_store_table ();
6450 if (num_stores
== 0)
6452 htab_delete (pre_ldst_table
);
6453 pre_ldst_table
= NULL
;
6454 sbitmap_vector_free (reg_set_in_block
);
6455 end_alias_analysis ();
6459 /* Now compute kill & transp vectors. */
6460 build_store_vectors ();
6461 add_noreturn_fake_exit_edges ();
6462 connect_infinite_loops_to_exit ();
6464 edge_list
= pre_edge_rev_lcm (num_stores
, transp
, ae_gen
,
6465 st_antloc
, ae_kill
, &pre_insert_map
,
6468 /* Now we want to insert the new stores which are going to be needed. */
6469 for (ptr
= first_ls_expr (); ptr
!= NULL
; ptr
= next_ls_expr (ptr
))
6471 /* If any of the edges we have above are abnormal, we can't move this
6473 for (x
= NUM_EDGES (edge_list
) - 1; x
>= 0; x
--)
6474 if (TEST_BIT (pre_insert_map
[x
], ptr
->index
)
6475 && (INDEX_EDGE (edge_list
, x
)->flags
& EDGE_ABNORMAL
))
6480 if (dump_file
!= NULL
)
6482 "Can't replace store %d: abnormal edge from %d to %d\n",
6483 ptr
->index
, INDEX_EDGE (edge_list
, x
)->src
->index
,
6484 INDEX_EDGE (edge_list
, x
)->dest
->index
);
6488 /* Now we want to insert the new stores which are going to be needed. */
6491 if (TEST_BIT (pre_delete_map
[bb
->index
], ptr
->index
))
6492 delete_store (ptr
, bb
);
6494 for (x
= 0; x
< NUM_EDGES (edge_list
); x
++)
6495 if (TEST_BIT (pre_insert_map
[x
], ptr
->index
))
6496 update_flow
|= insert_store (ptr
, INDEX_EDGE (edge_list
, x
));
6500 commit_edge_insertions ();
6502 free_store_memory ();
6503 free_edge_list (edge_list
);
6504 remove_fake_exit_edges ();
6505 end_alias_analysis ();
6509 /* Entry point for jump bypassing optimization pass. */
6516 /* We do not construct an accurate cfg in functions which call
6517 setjmp, so just punt to be safe. */
6518 if (current_function_calls_setjmp
)
6521 /* Identify the basic block information for this function, including
6522 successors and predecessors. */
6523 max_gcse_regno
= max_reg_num ();
6526 dump_flow_info (dump_file
, dump_flags
);
6528 /* Return if there's nothing to do, or it is too expensive. */
6529 if (n_basic_blocks
<= NUM_FIXED_BLOCKS
+ 1
6530 || is_too_expensive (_ ("jump bypassing disabled")))
6533 gcc_obstack_init (&gcse_obstack
);
6536 /* We need alias. */
6537 init_alias_analysis ();
6539 /* Record where pseudo-registers are set. This data is kept accurate
6540 during each pass. ??? We could also record hard-reg information here
6541 [since it's unchanging], however it is currently done during hash table
6544 It may be tempting to compute MEM set information here too, but MEM sets
6545 will be subject to code motion one day and thus we need to compute
6546 information about memory sets when we build the hash tables. */
6548 alloc_reg_set_mem (max_gcse_regno
);
6551 max_gcse_regno
= max_reg_num ();
6553 changed
= one_cprop_pass (MAX_GCSE_PASSES
+ 2, true, true);
6558 fprintf (dump_file
, "BYPASS of %s: %d basic blocks, ",
6559 current_function_name (), n_basic_blocks
);
6560 fprintf (dump_file
, "%d bytes\n\n", bytes_used
);
6563 obstack_free (&gcse_obstack
, NULL
);
6564 free_reg_set_mem ();
6566 /* We are finished with alias. */
6567 end_alias_analysis ();
6568 allocate_reg_info (max_reg_num (), FALSE
, FALSE
);
6573 /* Return true if the graph is too expensive to optimize. PASS is the
6574 optimization about to be performed. */
6577 is_too_expensive (const char *pass
)
6579 /* Trying to perform global optimizations on flow graphs which have
6580 a high connectivity will take a long time and is unlikely to be
6581 particularly useful.
6583 In normal circumstances a cfg should have about twice as many
6584 edges as blocks. But we do not want to punish small functions
6585 which have a couple switch statements. Rather than simply
6586 threshold the number of blocks, uses something with a more
6587 graceful degradation. */
6588 if (n_edges
> 20000 + n_basic_blocks
* 4)
6590 warning (OPT_Wdisabled_optimization
,
6591 "%s: %d basic blocks and %d edges/basic block",
6592 pass
, n_basic_blocks
, n_edges
/ n_basic_blocks
);
6597 /* If allocating memory for the cprop bitmap would take up too much
6598 storage it's better just to disable the optimization. */
6600 * SBITMAP_SET_SIZE (max_reg_num ())
6601 * sizeof (SBITMAP_ELT_TYPE
)) > MAX_GCSE_MEMORY
)
6603 warning (OPT_Wdisabled_optimization
,
6604 "%s: %d basic blocks and %d registers",
6605 pass
, n_basic_blocks
, max_reg_num ());
6614 gate_handle_jump_bypass (void)
6616 return optimize
> 0 && flag_gcse
;
6619 /* Perform jump bypassing and control flow optimizations. */
6621 rest_of_handle_jump_bypass (void)
6623 cleanup_cfg (CLEANUP_EXPENSIVE
);
6624 reg_scan (get_insns (), max_reg_num ());
6626 if (bypass_jumps ())
6628 rebuild_jump_labels (get_insns ());
6629 cleanup_cfg (CLEANUP_EXPENSIVE
);
6630 delete_trivially_dead_insns (get_insns (), max_reg_num ());
6635 struct tree_opt_pass pass_jump_bypass
=
6637 "bypass", /* name */
6638 gate_handle_jump_bypass
, /* gate */
6639 rest_of_handle_jump_bypass
, /* execute */
6642 0, /* static_pass_number */
6643 TV_BYPASS
, /* tv_id */
6644 0, /* properties_required */
6645 0, /* properties_provided */
6646 0, /* properties_destroyed */
6647 0, /* todo_flags_start */
6649 TODO_ggc_collect
| TODO_verify_flow
, /* todo_flags_finish */
6655 gate_handle_gcse (void)
6657 return optimize
> 0 && flag_gcse
;
6662 rest_of_handle_gcse (void)
6664 int save_csb
, save_cfj
;
6667 tem
= gcse_main (get_insns ());
6668 rebuild_jump_labels (get_insns ());
6669 delete_trivially_dead_insns (get_insns (), max_reg_num ());
6671 save_csb
= flag_cse_skip_blocks
;
6672 save_cfj
= flag_cse_follow_jumps
;
6673 flag_cse_skip_blocks
= flag_cse_follow_jumps
= 0;
6675 /* If -fexpensive-optimizations, re-run CSE to clean up things done
6677 if (flag_expensive_optimizations
)
6679 timevar_push (TV_CSE
);
6680 reg_scan (get_insns (), max_reg_num ());
6681 tem2
= cse_main (get_insns (), max_reg_num ());
6682 purge_all_dead_edges ();
6683 delete_trivially_dead_insns (get_insns (), max_reg_num ());
6684 timevar_pop (TV_CSE
);
6685 cse_not_expected
= !flag_rerun_cse_after_loop
;
6688 /* If gcse or cse altered any jumps, rerun jump optimizations to clean
6692 timevar_push (TV_JUMP
);
6693 rebuild_jump_labels (get_insns ());
6694 delete_dead_jumptables ();
6695 cleanup_cfg (CLEANUP_EXPENSIVE
);
6696 timevar_pop (TV_JUMP
);
6699 flag_cse_skip_blocks
= save_csb
;
6700 flag_cse_follow_jumps
= save_cfj
;
6704 struct tree_opt_pass pass_gcse
=
6707 gate_handle_gcse
, /* gate */
6708 rest_of_handle_gcse
, /* execute */
6711 0, /* static_pass_number */
6712 TV_GCSE
, /* tv_id */
6713 0, /* properties_required */
6714 0, /* properties_provided */
6715 0, /* properties_destroyed */
6716 0, /* todo_flags_start */
6718 TODO_verify_flow
| TODO_ggc_collect
, /* todo_flags_finish */
6723 #include "gt-gcse.h"