* configure: Regenerated.
[official-gcc.git] / gcc / gcse.c
bloba066b36c642abb3bab1a7e7bbae72f533334fb76
1 /* Partial redundancy elimination / Hoisting for RTL.
2 Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
3 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* TODO
22 - reordering of memory allocation and freeing to be more space efficient
23 - do rough calc of how many regs are needed in each block, and a rough
24 calc of how many regs are available in each class and use that to
25 throttle back the code in cases where RTX_COST is minimal.
28 /* References searched while implementing this.
30 Compilers Principles, Techniques and Tools
31 Aho, Sethi, Ullman
32 Addison-Wesley, 1988
34 Global Optimization by Suppression of Partial Redundancies
35 E. Morel, C. Renvoise
36 communications of the acm, Vol. 22, Num. 2, Feb. 1979
38 A Portable Machine-Independent Global Optimizer - Design and Measurements
39 Frederick Chow
40 Stanford Ph.D. thesis, Dec. 1983
42 A Fast Algorithm for Code Movement Optimization
43 D.M. Dhamdhere
44 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
46 A Solution to a Problem with Morel and Renvoise's
47 Global Optimization by Suppression of Partial Redundancies
48 K-H Drechsler, M.P. Stadel
49 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
51 Practical Adaptation of the Global Optimization
52 Algorithm of Morel and Renvoise
53 D.M. Dhamdhere
54 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
56 Efficiently Computing Static Single Assignment Form and the Control
57 Dependence Graph
58 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
59 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
61 Lazy Code Motion
62 J. Knoop, O. Ruthing, B. Steffen
63 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
65 What's In a Region? Or Computing Control Dependence Regions in Near-Linear
66 Time for Reducible Flow Control
67 Thomas Ball
68 ACM Letters on Programming Languages and Systems,
69 Vol. 2, Num. 1-4, Mar-Dec 1993
71 An Efficient Representation for Sparse Sets
72 Preston Briggs, Linda Torczon
73 ACM Letters on Programming Languages and Systems,
74 Vol. 2, Num. 1-4, Mar-Dec 1993
76 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
77 K-H Drechsler, M.P. Stadel
78 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
80 Partial Dead Code Elimination
81 J. Knoop, O. Ruthing, B. Steffen
82 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
84 Effective Partial Redundancy Elimination
85 P. Briggs, K.D. Cooper
86 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
88 The Program Structure Tree: Computing Control Regions in Linear Time
89 R. Johnson, D. Pearson, K. Pingali
90 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
92 Optimal Code Motion: Theory and Practice
93 J. Knoop, O. Ruthing, B. Steffen
94 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
96 The power of assignment motion
97 J. Knoop, O. Ruthing, B. Steffen
98 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
100 Global code motion / global value numbering
101 C. Click
102 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
104 Value Driven Redundancy Elimination
105 L.T. Simpson
106 Rice University Ph.D. thesis, Apr. 1996
108 Value Numbering
109 L.T. Simpson
110 Massively Scalar Compiler Project, Rice University, Sep. 1996
112 High Performance Compilers for Parallel Computing
113 Michael Wolfe
114 Addison-Wesley, 1996
116 Advanced Compiler Design and Implementation
117 Steven Muchnick
118 Morgan Kaufmann, 1997
120 Building an Optimizing Compiler
121 Robert Morgan
122 Digital Press, 1998
124 People wishing to speed up the code here should read:
125 Elimination Algorithms for Data Flow Analysis
126 B.G. Ryder, M.C. Paull
127 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
129 How to Analyze Large Programs Efficiently and Informatively
130 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
131 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
133 People wishing to do something different can find various possibilities
134 in the above papers and elsewhere.
137 #include "config.h"
138 #include "system.h"
139 #include "coretypes.h"
140 #include "tm.h"
141 #include "diagnostic-core.h"
142 #include "toplev.h"
144 #include "rtl.h"
145 #include "tree.h"
146 #include "tm_p.h"
147 #include "regs.h"
148 #include "hard-reg-set.h"
149 #include "flags.h"
150 #include "insn-config.h"
151 #include "recog.h"
152 #include "basic-block.h"
153 #include "function.h"
154 #include "expr.h"
155 #include "except.h"
156 #include "ggc.h"
157 #include "params.h"
158 #include "cselib.h"
159 #include "intl.h"
160 #include "obstack.h"
161 #include "tree-pass.h"
162 #include "hashtab.h"
163 #include "df.h"
164 #include "dbgcnt.h"
165 #include "target.h"
166 #include "gcse.h"
168 /* We support GCSE via Partial Redundancy Elimination. PRE optimizations
169 are a superset of those done by classic GCSE.
171 Two passes of copy/constant propagation are done around PRE or hoisting
172 because the first one enables more GCSE and the second one helps to clean
173 up the copies that PRE and HOIST create. This is needed more for PRE than
174 for HOIST because code hoisting will try to use an existing register
175 containing the common subexpression rather than create a new one. This is
176 harder to do for PRE because of the code motion (which HOIST doesn't do).
178 Expressions we are interested in GCSE-ing are of the form
179 (set (pseudo-reg) (expression)).
180 Function want_to_gcse_p says what these are.
182 In addition, expressions in REG_EQUAL notes are candidates for GCSE-ing.
183 This allows PRE to hoist expressions that are expressed in multiple insns,
184 such as complex address calculations (e.g. for PIC code, or loads with a
185 high part and a low part).
187 PRE handles moving invariant expressions out of loops (by treating them as
188 partially redundant).
190 **********************
192 We used to support multiple passes but there are diminishing returns in
193 doing so. The first pass usually makes 90% of the changes that are doable.
194 A second pass can make a few more changes made possible by the first pass.
195 Experiments show any further passes don't make enough changes to justify
196 the expense.
198 A study of spec92 using an unlimited number of passes:
199 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
200 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
201 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
203 It was found doing copy propagation between each pass enables further
204 substitutions.
206 This study was done before expressions in REG_EQUAL notes were added as
207 candidate expressions for optimization, and before the GIMPLE optimizers
208 were added. Probably, multiple passes is even less efficient now than
209 at the time when the study was conducted.
211 PRE is quite expensive in complicated functions because the DFA can take
212 a while to converge. Hence we only perform one pass.
214 **********************
216 The steps for PRE are:
218 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
220 2) Perform the data flow analysis for PRE.
222 3) Delete the redundant instructions
224 4) Insert the required copies [if any] that make the partially
225 redundant instructions fully redundant.
227 5) For other reaching expressions, insert an instruction to copy the value
228 to a newly created pseudo that will reach the redundant instruction.
230 The deletion is done first so that when we do insertions we
231 know which pseudo reg to use.
233 Various papers have argued that PRE DFA is expensive (O(n^2)) and others
234 argue it is not. The number of iterations for the algorithm to converge
235 is typically 2-4 so I don't view it as that expensive (relatively speaking).
237 PRE GCSE depends heavily on the second CPROP pass to clean up the copies
238 we create. To make an expression reach the place where it's redundant,
239 the result of the expression is copied to a new register, and the redundant
240 expression is deleted by replacing it with this new register. Classic GCSE
241 doesn't have this problem as much as it computes the reaching defs of
242 each register in each block and thus can try to use an existing
243 register. */
245 /* GCSE global vars. */
247 struct target_gcse default_target_gcse;
248 #if SWITCHABLE_TARGET
249 struct target_gcse *this_target_gcse = &default_target_gcse;
250 #endif
252 /* Set to non-zero if CSE should run after all GCSE optimizations are done. */
253 int flag_rerun_cse_after_global_opts;
255 /* An obstack for our working variables. */
256 static struct obstack gcse_obstack;
258 struct reg_use {rtx reg_rtx; };
260 /* Hash table of expressions. */
262 struct expr
264 /* The expression. */
265 rtx expr;
266 /* Index in the available expression bitmaps. */
267 int bitmap_index;
268 /* Next entry with the same hash. */
269 struct expr *next_same_hash;
270 /* List of anticipatable occurrences in basic blocks in the function.
271 An "anticipatable occurrence" is one that is the first occurrence in the
272 basic block, the operands are not modified in the basic block prior
273 to the occurrence and the output is not used between the start of
274 the block and the occurrence. */
275 struct occr *antic_occr;
276 /* List of available occurrence in basic blocks in the function.
277 An "available occurrence" is one that is the last occurrence in the
278 basic block and the operands are not modified by following statements in
279 the basic block [including this insn]. */
280 struct occr *avail_occr;
281 /* Non-null if the computation is PRE redundant.
282 The value is the newly created pseudo-reg to record a copy of the
283 expression in all the places that reach the redundant copy. */
284 rtx reaching_reg;
285 /* Maximum distance in instructions this expression can travel.
286 We avoid moving simple expressions for more than a few instructions
287 to keep register pressure under control.
288 A value of "0" removes restrictions on how far the expression can
289 travel. */
290 int max_distance;
293 /* Occurrence of an expression.
294 There is one per basic block. If a pattern appears more than once the
295 last appearance is used [or first for anticipatable expressions]. */
297 struct occr
299 /* Next occurrence of this expression. */
300 struct occr *next;
301 /* The insn that computes the expression. */
302 rtx insn;
303 /* Nonzero if this [anticipatable] occurrence has been deleted. */
304 char deleted_p;
305 /* Nonzero if this [available] occurrence has been copied to
306 reaching_reg. */
307 /* ??? This is mutually exclusive with deleted_p, so they could share
308 the same byte. */
309 char copied_p;
312 typedef struct occr *occr_t;
313 DEF_VEC_P (occr_t);
314 DEF_VEC_ALLOC_P (occr_t, heap);
316 /* Expression hash tables.
317 Each hash table is an array of buckets.
318 ??? It is known that if it were an array of entries, structure elements
319 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
320 not clear whether in the final analysis a sufficient amount of memory would
321 be saved as the size of the available expression bitmaps would be larger
322 [one could build a mapping table without holes afterwards though].
323 Someday I'll perform the computation and figure it out. */
325 struct hash_table_d
327 /* The table itself.
328 This is an array of `expr_hash_table_size' elements. */
329 struct expr **table;
331 /* Size of the hash table, in elements. */
332 unsigned int size;
334 /* Number of hash table elements. */
335 unsigned int n_elems;
338 /* Expression hash table. */
339 static struct hash_table_d expr_hash_table;
341 /* This is a list of expressions which are MEMs and will be used by load
342 or store motion.
343 Load motion tracks MEMs which aren't killed by anything except itself,
344 i.e. loads and stores to a single location.
345 We can then allow movement of these MEM refs with a little special
346 allowance. (all stores copy the same value to the reaching reg used
347 for the loads). This means all values used to store into memory must have
348 no side effects so we can re-issue the setter value. */
350 struct ls_expr
352 struct expr * expr; /* Gcse expression reference for LM. */
353 rtx pattern; /* Pattern of this mem. */
354 rtx pattern_regs; /* List of registers mentioned by the mem. */
355 rtx loads; /* INSN list of loads seen. */
356 rtx stores; /* INSN list of stores seen. */
357 struct ls_expr * next; /* Next in the list. */
358 int invalid; /* Invalid for some reason. */
359 int index; /* If it maps to a bitmap index. */
360 unsigned int hash_index; /* Index when in a hash table. */
361 rtx reaching_reg; /* Register to use when re-writing. */
364 /* Head of the list of load/store memory refs. */
365 static struct ls_expr * pre_ldst_mems = NULL;
367 /* Hashtable for the load/store memory refs. */
368 static htab_t pre_ldst_table = NULL;
370 /* Bitmap containing one bit for each register in the program.
371 Used when performing GCSE to track which registers have been set since
372 the start of the basic block. */
373 static regset reg_set_bitmap;
375 /* Array, indexed by basic block number for a list of insns which modify
376 memory within that block. */
377 static VEC (rtx,heap) **modify_mem_list;
378 static bitmap modify_mem_list_set;
380 typedef struct modify_pair_s
382 rtx dest; /* A MEM. */
383 rtx dest_addr; /* The canonical address of `dest'. */
384 } modify_pair;
386 DEF_VEC_O(modify_pair);
387 DEF_VEC_ALLOC_O(modify_pair,heap);
389 /* This array parallels modify_mem_list, except that it stores MEMs
390 being set and their canonicalized memory addresses. */
391 static VEC (modify_pair,heap) **canon_modify_mem_list;
393 /* Bitmap indexed by block numbers to record which blocks contain
394 function calls. */
395 static bitmap blocks_with_calls;
397 /* Various variables for statistics gathering. */
399 /* Memory used in a pass.
400 This isn't intended to be absolutely precise. Its intent is only
401 to keep an eye on memory usage. */
402 static int bytes_used;
404 /* GCSE substitutions made. */
405 static int gcse_subst_count;
406 /* Number of copy instructions created. */
407 static int gcse_create_count;
409 /* Doing code hoisting. */
410 static bool doing_code_hoisting_p = false;
412 /* For available exprs */
413 static sbitmap *ae_kill;
415 static void compute_can_copy (void);
416 static void *gmalloc (size_t) ATTRIBUTE_MALLOC;
417 static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC;
418 static void *gcse_alloc (unsigned long);
419 static void alloc_gcse_mem (void);
420 static void free_gcse_mem (void);
421 static void hash_scan_insn (rtx, struct hash_table_d *);
422 static void hash_scan_set (rtx, rtx, struct hash_table_d *);
423 static void hash_scan_clobber (rtx, rtx, struct hash_table_d *);
424 static void hash_scan_call (rtx, rtx, struct hash_table_d *);
425 static int want_to_gcse_p (rtx, int *);
426 static int oprs_unchanged_p (const_rtx, const_rtx, int);
427 static int oprs_anticipatable_p (const_rtx, const_rtx);
428 static int oprs_available_p (const_rtx, const_rtx);
429 static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, int,
430 struct hash_table_d *);
431 static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int);
432 static int expr_equiv_p (const_rtx, const_rtx);
433 static void record_last_reg_set_info (rtx, int);
434 static void record_last_mem_set_info (rtx);
435 static void record_last_set_info (rtx, const_rtx, void *);
436 static void compute_hash_table (struct hash_table_d *);
437 static void alloc_hash_table (struct hash_table_d *);
438 static void free_hash_table (struct hash_table_d *);
439 static void compute_hash_table_work (struct hash_table_d *);
440 static void dump_hash_table (FILE *, const char *, struct hash_table_d *);
441 static void compute_transp (const_rtx, int, sbitmap *);
442 static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
443 struct hash_table_d *);
444 static void mems_conflict_for_gcse_p (rtx, const_rtx, void *);
445 static int load_killed_in_block_p (const_basic_block, int, const_rtx, int);
446 static void canon_list_insert (rtx, const_rtx, void *);
447 static void alloc_pre_mem (int, int);
448 static void free_pre_mem (void);
449 static struct edge_list *compute_pre_data (void);
450 static int pre_expr_reaches_here_p (basic_block, struct expr *,
451 basic_block);
452 static void insert_insn_end_basic_block (struct expr *, basic_block);
453 static void pre_insert_copy_insn (struct expr *, rtx);
454 static void pre_insert_copies (void);
455 static int pre_delete (void);
456 static int pre_gcse (struct edge_list *);
457 static int one_pre_gcse_pass (void);
458 static void add_label_notes (rtx, rtx);
459 static void alloc_code_hoist_mem (int, int);
460 static void free_code_hoist_mem (void);
461 static void compute_code_hoist_vbeinout (void);
462 static void compute_code_hoist_data (void);
463 static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *,
464 int, int *);
465 static int hoist_code (void);
466 static int one_code_hoisting_pass (void);
467 static rtx process_insert_insn (struct expr *);
468 static int pre_edge_insert (struct edge_list *, struct expr **);
469 static int pre_expr_reaches_here_p_work (basic_block, struct expr *,
470 basic_block, char *);
471 static struct ls_expr * ldst_entry (rtx);
472 static void free_ldst_entry (struct ls_expr *);
473 static void free_ld_motion_mems (void);
474 static void print_ldst_list (FILE *);
475 static struct ls_expr * find_rtx_in_ldst (rtx);
476 static int simple_mem (const_rtx);
477 static void invalidate_any_buried_refs (rtx);
478 static void compute_ld_motion_mems (void);
479 static void trim_ld_motion_mems (void);
480 static void update_ld_motion_stores (struct expr *);
481 static void clear_modify_mem_tables (void);
482 static void free_modify_mem_tables (void);
483 static rtx gcse_emit_move_after (rtx, rtx, rtx);
484 static bool is_too_expensive (const char *);
486 #define GNEW(T) ((T *) gmalloc (sizeof (T)))
487 #define GCNEW(T) ((T *) gcalloc (1, sizeof (T)))
489 #define GNEWVEC(T, N) ((T *) gmalloc (sizeof (T) * (N)))
490 #define GCNEWVEC(T, N) ((T *) gcalloc ((N), sizeof (T)))
492 #define GNEWVAR(T, S) ((T *) gmalloc ((S)))
493 #define GCNEWVAR(T, S) ((T *) gcalloc (1, (S)))
495 #define GOBNEW(T) ((T *) gcse_alloc (sizeof (T)))
496 #define GOBNEWVAR(T, S) ((T *) gcse_alloc ((S)))
498 /* Misc. utilities. */
500 #define can_copy \
501 (this_target_gcse->x_can_copy)
502 #define can_copy_init_p \
503 (this_target_gcse->x_can_copy_init_p)
505 /* Compute which modes support reg/reg copy operations. */
507 static void
508 compute_can_copy (void)
510 int i;
511 #ifndef AVOID_CCMODE_COPIES
512 rtx reg, insn;
513 #endif
514 memset (can_copy, 0, NUM_MACHINE_MODES);
516 start_sequence ();
517 for (i = 0; i < NUM_MACHINE_MODES; i++)
518 if (GET_MODE_CLASS (i) == MODE_CC)
520 #ifdef AVOID_CCMODE_COPIES
521 can_copy[i] = 0;
522 #else
523 reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
524 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
525 if (recog (PATTERN (insn), insn, NULL) >= 0)
526 can_copy[i] = 1;
527 #endif
529 else
530 can_copy[i] = 1;
532 end_sequence ();
535 /* Returns whether the mode supports reg/reg copy operations. */
537 bool
538 can_copy_p (enum machine_mode mode)
540 if (! can_copy_init_p)
542 compute_can_copy ();
543 can_copy_init_p = true;
546 return can_copy[mode] != 0;
549 /* Cover function to xmalloc to record bytes allocated. */
551 static void *
552 gmalloc (size_t size)
554 bytes_used += size;
555 return xmalloc (size);
558 /* Cover function to xcalloc to record bytes allocated. */
560 static void *
561 gcalloc (size_t nelem, size_t elsize)
563 bytes_used += nelem * elsize;
564 return xcalloc (nelem, elsize);
567 /* Cover function to obstack_alloc. */
569 static void *
570 gcse_alloc (unsigned long size)
572 bytes_used += size;
573 return obstack_alloc (&gcse_obstack, size);
576 /* Allocate memory for the reg/memory set tracking tables.
577 This is called at the start of each pass. */
579 static void
580 alloc_gcse_mem (void)
582 /* Allocate vars to track sets of regs. */
583 reg_set_bitmap = ALLOC_REG_SET (NULL);
585 /* Allocate array to keep a list of insns which modify memory in each
586 basic block. */
587 modify_mem_list = GCNEWVEC (VEC (rtx,heap) *, last_basic_block);
588 canon_modify_mem_list = GCNEWVEC (VEC (modify_pair,heap) *,
589 last_basic_block);
590 modify_mem_list_set = BITMAP_ALLOC (NULL);
591 blocks_with_calls = BITMAP_ALLOC (NULL);
594 /* Free memory allocated by alloc_gcse_mem. */
596 static void
597 free_gcse_mem (void)
599 FREE_REG_SET (reg_set_bitmap);
601 free_modify_mem_tables ();
602 BITMAP_FREE (modify_mem_list_set);
603 BITMAP_FREE (blocks_with_calls);
606 /* Compute the local properties of each recorded expression.
608 Local properties are those that are defined by the block, irrespective of
609 other blocks.
611 An expression is transparent in a block if its operands are not modified
612 in the block.
614 An expression is computed (locally available) in a block if it is computed
615 at least once and expression would contain the same value if the
616 computation was moved to the end of the block.
618 An expression is locally anticipatable in a block if it is computed at
619 least once and expression would contain the same value if the computation
620 was moved to the beginning of the block.
622 We call this routine for pre and code hoisting. They all compute
623 basically the same information and thus can easily share this code.
625 TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
626 properties. If NULL, then it is not necessary to compute or record that
627 particular property.
629 TABLE controls which hash table to look at. */
631 static void
632 compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc,
633 struct hash_table_d *table)
635 unsigned int i;
637 /* Initialize any bitmaps that were passed in. */
638 if (transp)
640 sbitmap_vector_ones (transp, last_basic_block);
643 if (comp)
644 sbitmap_vector_zero (comp, last_basic_block);
645 if (antloc)
646 sbitmap_vector_zero (antloc, last_basic_block);
648 for (i = 0; i < table->size; i++)
650 struct expr *expr;
652 for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
654 int indx = expr->bitmap_index;
655 struct occr *occr;
657 /* The expression is transparent in this block if it is not killed.
658 We start by assuming all are transparent [none are killed], and
659 then reset the bits for those that are. */
660 if (transp)
661 compute_transp (expr->expr, indx, transp);
663 /* The occurrences recorded in antic_occr are exactly those that
664 we want to set to nonzero in ANTLOC. */
665 if (antloc)
666 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
668 SET_BIT (antloc[BLOCK_FOR_INSN (occr->insn)->index], indx);
670 /* While we're scanning the table, this is a good place to
671 initialize this. */
672 occr->deleted_p = 0;
675 /* The occurrences recorded in avail_occr are exactly those that
676 we want to set to nonzero in COMP. */
677 if (comp)
678 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
680 SET_BIT (comp[BLOCK_FOR_INSN (occr->insn)->index], indx);
682 /* While we're scanning the table, this is a good place to
683 initialize this. */
684 occr->copied_p = 0;
687 /* While we're scanning the table, this is a good place to
688 initialize this. */
689 expr->reaching_reg = 0;
694 /* Hash table support. */
696 struct reg_avail_info
698 basic_block last_bb;
699 int first_set;
700 int last_set;
703 static struct reg_avail_info *reg_avail_info;
704 static basic_block current_bb;
706 /* See whether X, the source of a set, is something we want to consider for
707 GCSE. */
709 static int
710 want_to_gcse_p (rtx x, int *max_distance_ptr)
712 #ifdef STACK_REGS
713 /* On register stack architectures, don't GCSE constants from the
714 constant pool, as the benefits are often swamped by the overhead
715 of shuffling the register stack between basic blocks. */
716 if (IS_STACK_MODE (GET_MODE (x)))
717 x = avoid_constant_pool_reference (x);
718 #endif
720 /* GCSE'ing constants:
722 We do not specifically distinguish between constant and non-constant
723 expressions in PRE and Hoist. We use set_src_cost below to limit
724 the maximum distance simple expressions can travel.
726 Nevertheless, constants are much easier to GCSE, and, hence,
727 it is easy to overdo the optimizations. Usually, excessive PRE and
728 Hoisting of constant leads to increased register pressure.
730 RA can deal with this by rematerialing some of the constants.
731 Therefore, it is important that the back-end generates sets of constants
732 in a way that allows reload rematerialize them under high register
733 pressure, i.e., a pseudo register with REG_EQUAL to constant
734 is set only once. Failing to do so will result in IRA/reload
735 spilling such constants under high register pressure instead of
736 rematerializing them. */
738 switch (GET_CODE (x))
740 case REG:
741 case SUBREG:
742 case CALL:
743 return 0;
745 CASE_CONST_ANY:
746 if (!doing_code_hoisting_p)
747 /* Do not PRE constants. */
748 return 0;
750 /* FALLTHRU */
752 default:
753 if (doing_code_hoisting_p)
754 /* PRE doesn't implement max_distance restriction. */
756 int cost;
757 int max_distance;
759 gcc_assert (!optimize_function_for_speed_p (cfun)
760 && optimize_function_for_size_p (cfun));
761 cost = set_src_cost (x, 0);
763 if (cost < COSTS_N_INSNS (GCSE_UNRESTRICTED_COST))
765 max_distance = (GCSE_COST_DISTANCE_RATIO * cost) / 10;
766 if (max_distance == 0)
767 return 0;
769 gcc_assert (max_distance > 0);
771 else
772 max_distance = 0;
774 if (max_distance_ptr)
775 *max_distance_ptr = max_distance;
778 return can_assign_to_reg_without_clobbers_p (x);
782 /* Used internally by can_assign_to_reg_without_clobbers_p. */
784 static GTY(()) rtx test_insn;
786 /* Return true if we can assign X to a pseudo register such that the
787 resulting insn does not result in clobbering a hard register as a
788 side-effect.
790 Additionally, if the target requires it, check that the resulting insn
791 can be copied. If it cannot, this means that X is special and probably
792 has hidden side-effects we don't want to mess with.
794 This function is typically used by code motion passes, to verify
795 that it is safe to insert an insn without worrying about clobbering
796 maybe live hard regs. */
798 bool
799 can_assign_to_reg_without_clobbers_p (rtx x)
801 int num_clobbers = 0;
802 int icode;
804 /* If this is a valid operand, we are OK. If it's VOIDmode, we aren't. */
805 if (general_operand (x, GET_MODE (x)))
806 return 1;
807 else if (GET_MODE (x) == VOIDmode)
808 return 0;
810 /* Otherwise, check if we can make a valid insn from it. First initialize
811 our test insn if we haven't already. */
812 if (test_insn == 0)
814 test_insn
815 = make_insn_raw (gen_rtx_SET (VOIDmode,
816 gen_rtx_REG (word_mode,
817 FIRST_PSEUDO_REGISTER * 2),
818 const0_rtx));
819 NEXT_INSN (test_insn) = PREV_INSN (test_insn) = 0;
822 /* Now make an insn like the one we would make when GCSE'ing and see if
823 valid. */
824 PUT_MODE (SET_DEST (PATTERN (test_insn)), GET_MODE (x));
825 SET_SRC (PATTERN (test_insn)) = x;
827 icode = recog (PATTERN (test_insn), test_insn, &num_clobbers);
828 if (icode < 0)
829 return false;
831 if (num_clobbers > 0 && added_clobbers_hard_reg_p (icode))
832 return false;
834 if (targetm.cannot_copy_insn_p && targetm.cannot_copy_insn_p (test_insn))
835 return false;
837 return true;
840 /* Return nonzero if the operands of expression X are unchanged from the
841 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
842 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */
844 static int
845 oprs_unchanged_p (const_rtx x, const_rtx insn, int avail_p)
847 int i, j;
848 enum rtx_code code;
849 const char *fmt;
851 if (x == 0)
852 return 1;
854 code = GET_CODE (x);
855 switch (code)
857 case REG:
859 struct reg_avail_info *info = &reg_avail_info[REGNO (x)];
861 if (info->last_bb != current_bb)
862 return 1;
863 if (avail_p)
864 return info->last_set < DF_INSN_LUID (insn);
865 else
866 return info->first_set >= DF_INSN_LUID (insn);
869 case MEM:
870 if (load_killed_in_block_p (current_bb, DF_INSN_LUID (insn),
871 x, avail_p))
872 return 0;
873 else
874 return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
876 case PRE_DEC:
877 case PRE_INC:
878 case POST_DEC:
879 case POST_INC:
880 case PRE_MODIFY:
881 case POST_MODIFY:
882 return 0;
884 case PC:
885 case CC0: /*FIXME*/
886 case CONST:
887 CASE_CONST_ANY:
888 case SYMBOL_REF:
889 case LABEL_REF:
890 case ADDR_VEC:
891 case ADDR_DIFF_VEC:
892 return 1;
894 default:
895 break;
898 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
900 if (fmt[i] == 'e')
902 /* If we are about to do the last recursive call needed at this
903 level, change it into iteration. This function is called enough
904 to be worth it. */
905 if (i == 0)
906 return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
908 else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
909 return 0;
911 else if (fmt[i] == 'E')
912 for (j = 0; j < XVECLEN (x, i); j++)
913 if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
914 return 0;
917 return 1;
920 /* Info passed from load_killed_in_block_p to mems_conflict_for_gcse_p. */
922 struct mem_conflict_info
924 /* A memory reference for a load instruction, mems_conflict_for_gcse_p will
925 see if a memory store conflicts with this memory load. */
926 const_rtx mem;
928 /* True if mems_conflict_for_gcse_p finds a conflict between two memory
929 references. */
930 bool conflict;
933 /* DEST is the output of an instruction. If it is a memory reference and
934 possibly conflicts with the load found in DATA, then communicate this
935 information back through DATA. */
937 static void
938 mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
939 void *data)
941 struct mem_conflict_info *mci = (struct mem_conflict_info *) data;
943 while (GET_CODE (dest) == SUBREG
944 || GET_CODE (dest) == ZERO_EXTRACT
945 || GET_CODE (dest) == STRICT_LOW_PART)
946 dest = XEXP (dest, 0);
948 /* If DEST is not a MEM, then it will not conflict with the load. Note
949 that function calls are assumed to clobber memory, but are handled
950 elsewhere. */
951 if (! MEM_P (dest))
952 return;
954 /* If we are setting a MEM in our list of specially recognized MEMs,
955 don't mark as killed this time. */
956 if (pre_ldst_mems != NULL && expr_equiv_p (dest, mci->mem))
958 if (!find_rtx_in_ldst (dest))
959 mci->conflict = true;
960 return;
963 if (true_dependence (dest, GET_MODE (dest), mci->mem))
964 mci->conflict = true;
967 /* Return nonzero if the expression in X (a memory reference) is killed
968 in block BB before or after the insn with the LUID in UID_LIMIT.
969 AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
970 before UID_LIMIT.
972 To check the entire block, set UID_LIMIT to max_uid + 1 and
973 AVAIL_P to 0. */
975 static int
976 load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x,
977 int avail_p)
979 VEC (rtx,heap) *list = modify_mem_list[bb->index];
980 rtx setter;
981 unsigned ix;
983 /* If this is a readonly then we aren't going to be changing it. */
984 if (MEM_READONLY_P (x))
985 return 0;
987 FOR_EACH_VEC_ELT_REVERSE (rtx, list, ix, setter)
989 struct mem_conflict_info mci;
991 /* Ignore entries in the list that do not apply. */
992 if ((avail_p
993 && DF_INSN_LUID (setter) < uid_limit)
994 || (! avail_p
995 && DF_INSN_LUID (setter) > uid_limit))
996 continue;
998 /* If SETTER is a call everything is clobbered. Note that calls
999 to pure functions are never put on the list, so we need not
1000 worry about them. */
1001 if (CALL_P (setter))
1002 return 1;
1004 /* SETTER must be an INSN of some kind that sets memory. Call
1005 note_stores to examine each hunk of memory that is modified. */
1006 mci.mem = x;
1007 mci.conflict = false;
1008 note_stores (PATTERN (setter), mems_conflict_for_gcse_p, &mci);
1009 if (mci.conflict)
1010 return 1;
1012 return 0;
1015 /* Return nonzero if the operands of expression X are unchanged from
1016 the start of INSN's basic block up to but not including INSN. */
1018 static int
1019 oprs_anticipatable_p (const_rtx x, const_rtx insn)
1021 return oprs_unchanged_p (x, insn, 0);
1024 /* Return nonzero if the operands of expression X are unchanged from
1025 INSN to the end of INSN's basic block. */
1027 static int
1028 oprs_available_p (const_rtx x, const_rtx insn)
1030 return oprs_unchanged_p (x, insn, 1);
1033 /* Hash expression X.
1035 MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean
1036 indicating if a volatile operand is found or if the expression contains
1037 something we don't want to insert in the table. HASH_TABLE_SIZE is
1038 the current size of the hash table to be probed. */
1040 static unsigned int
1041 hash_expr (const_rtx x, enum machine_mode mode, int *do_not_record_p,
1042 int hash_table_size)
1044 unsigned int hash;
1046 *do_not_record_p = 0;
1048 hash = hash_rtx (x, mode, do_not_record_p, NULL, /*have_reg_qty=*/false);
1049 return hash % hash_table_size;
1052 /* Return nonzero if exp1 is equivalent to exp2. */
1054 static int
1055 expr_equiv_p (const_rtx x, const_rtx y)
1057 return exp_equiv_p (x, y, 0, true);
1060 /* Insert expression X in INSN in the hash TABLE.
1061 If it is already present, record it as the last occurrence in INSN's
1062 basic block.
1064 MODE is the mode of the value X is being stored into.
1065 It is only used if X is a CONST_INT.
1067 ANTIC_P is nonzero if X is an anticipatable expression.
1068 AVAIL_P is nonzero if X is an available expression.
1070 MAX_DISTANCE is the maximum distance in instructions this expression can
1071 be moved. */
1073 static void
1074 insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
1075 int avail_p, int max_distance, struct hash_table_d *table)
1077 int found, do_not_record_p;
1078 unsigned int hash;
1079 struct expr *cur_expr, *last_expr = NULL;
1080 struct occr *antic_occr, *avail_occr;
1082 hash = hash_expr (x, mode, &do_not_record_p, table->size);
1084 /* Do not insert expression in table if it contains volatile operands,
1085 or if hash_expr determines the expression is something we don't want
1086 to or can't handle. */
1087 if (do_not_record_p)
1088 return;
1090 cur_expr = table->table[hash];
1091 found = 0;
1093 while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1095 /* If the expression isn't found, save a pointer to the end of
1096 the list. */
1097 last_expr = cur_expr;
1098 cur_expr = cur_expr->next_same_hash;
1101 if (! found)
1103 cur_expr = GOBNEW (struct expr);
1104 bytes_used += sizeof (struct expr);
1105 if (table->table[hash] == NULL)
1106 /* This is the first pattern that hashed to this index. */
1107 table->table[hash] = cur_expr;
1108 else
1109 /* Add EXPR to end of this hash chain. */
1110 last_expr->next_same_hash = cur_expr;
1112 /* Set the fields of the expr element. */
1113 cur_expr->expr = x;
1114 cur_expr->bitmap_index = table->n_elems++;
1115 cur_expr->next_same_hash = NULL;
1116 cur_expr->antic_occr = NULL;
1117 cur_expr->avail_occr = NULL;
1118 gcc_assert (max_distance >= 0);
1119 cur_expr->max_distance = max_distance;
1121 else
1122 gcc_assert (cur_expr->max_distance == max_distance);
1124 /* Now record the occurrence(s). */
1125 if (antic_p)
1127 antic_occr = cur_expr->antic_occr;
1129 if (antic_occr
1130 && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn))
1131 antic_occr = NULL;
1133 if (antic_occr)
1134 /* Found another instance of the expression in the same basic block.
1135 Prefer the currently recorded one. We want the first one in the
1136 block and the block is scanned from start to end. */
1137 ; /* nothing to do */
1138 else
1140 /* First occurrence of this expression in this basic block. */
1141 antic_occr = GOBNEW (struct occr);
1142 bytes_used += sizeof (struct occr);
1143 antic_occr->insn = insn;
1144 antic_occr->next = cur_expr->antic_occr;
1145 antic_occr->deleted_p = 0;
1146 cur_expr->antic_occr = antic_occr;
1150 if (avail_p)
1152 avail_occr = cur_expr->avail_occr;
1154 if (avail_occr
1155 && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn))
1157 /* Found another instance of the expression in the same basic block.
1158 Prefer this occurrence to the currently recorded one. We want
1159 the last one in the block and the block is scanned from start
1160 to end. */
1161 avail_occr->insn = insn;
1163 else
1165 /* First occurrence of this expression in this basic block. */
1166 avail_occr = GOBNEW (struct occr);
1167 bytes_used += sizeof (struct occr);
1168 avail_occr->insn = insn;
1169 avail_occr->next = cur_expr->avail_occr;
1170 avail_occr->deleted_p = 0;
1171 cur_expr->avail_occr = avail_occr;
1176 /* Scan SET present in INSN and add an entry to the hash TABLE. */
1178 static void
1179 hash_scan_set (rtx set, rtx insn, struct hash_table_d *table)
1181 rtx src = SET_SRC (set);
1182 rtx dest = SET_DEST (set);
1183 rtx note;
1185 if (GET_CODE (src) == CALL)
1186 hash_scan_call (src, insn, table);
1188 else if (REG_P (dest))
1190 unsigned int regno = REGNO (dest);
1191 int max_distance = 0;
1193 /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
1195 This allows us to do a single GCSE pass and still eliminate
1196 redundant constants, addresses or other expressions that are
1197 constructed with multiple instructions.
1199 However, keep the original SRC if INSN is a simple reg-reg move.
1200 In this case, there will almost always be a REG_EQUAL note on the
1201 insn that sets SRC. By recording the REG_EQUAL value here as SRC
1202 for INSN, we miss copy propagation opportunities and we perform the
1203 same PRE GCSE operation repeatedly on the same REG_EQUAL value if we
1204 do more than one PRE GCSE pass.
1206 Note that this does not impede profitable constant propagations. We
1207 "look through" reg-reg sets in lookup_avail_set. */
1208 note = find_reg_equal_equiv_note (insn);
1209 if (note != 0
1210 && REG_NOTE_KIND (note) == REG_EQUAL
1211 && !REG_P (src)
1212 && want_to_gcse_p (XEXP (note, 0), NULL))
1213 src = XEXP (note, 0), set = gen_rtx_SET (VOIDmode, dest, src);
1215 /* Only record sets of pseudo-regs in the hash table. */
1216 if (regno >= FIRST_PSEUDO_REGISTER
1217 /* Don't GCSE something if we can't do a reg/reg copy. */
1218 && can_copy_p (GET_MODE (dest))
1219 /* GCSE commonly inserts instruction after the insn. We can't
1220 do that easily for EH edges so disable GCSE on these for now. */
1221 /* ??? We can now easily create new EH landing pads at the
1222 gimple level, for splitting edges; there's no reason we
1223 can't do the same thing at the rtl level. */
1224 && !can_throw_internal (insn)
1225 /* Is SET_SRC something we want to gcse? */
1226 && want_to_gcse_p (src, &max_distance)
1227 /* Don't CSE a nop. */
1228 && ! set_noop_p (set)
1229 /* Don't GCSE if it has attached REG_EQUIV note.
1230 At this point this only function parameters should have
1231 REG_EQUIV notes and if the argument slot is used somewhere
1232 explicitly, it means address of parameter has been taken,
1233 so we should not extend the lifetime of the pseudo. */
1234 && (note == NULL_RTX || ! MEM_P (XEXP (note, 0))))
1236 /* An expression is not anticipatable if its operands are
1237 modified before this insn or if this is not the only SET in
1238 this insn. The latter condition does not have to mean that
1239 SRC itself is not anticipatable, but we just will not be
1240 able to handle code motion of insns with multiple sets. */
1241 int antic_p = oprs_anticipatable_p (src, insn)
1242 && !multiple_sets (insn);
1243 /* An expression is not available if its operands are
1244 subsequently modified, including this insn. It's also not
1245 available if this is a branch, because we can't insert
1246 a set after the branch. */
1247 int avail_p = (oprs_available_p (src, insn)
1248 && ! JUMP_P (insn));
1250 insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p,
1251 max_distance, table);
1254 /* In case of store we want to consider the memory value as available in
1255 the REG stored in that memory. This makes it possible to remove
1256 redundant loads from due to stores to the same location. */
1257 else if (flag_gcse_las && REG_P (src) && MEM_P (dest))
1259 unsigned int regno = REGNO (src);
1260 int max_distance = 0;
1262 /* Only record sets of pseudo-regs in the hash table. */
1263 if (regno >= FIRST_PSEUDO_REGISTER
1264 /* Don't GCSE something if we can't do a reg/reg copy. */
1265 && can_copy_p (GET_MODE (src))
1266 /* GCSE commonly inserts instruction after the insn. We can't
1267 do that easily for EH edges so disable GCSE on these for now. */
1268 && !can_throw_internal (insn)
1269 /* Is SET_DEST something we want to gcse? */
1270 && want_to_gcse_p (dest, &max_distance)
1271 /* Don't CSE a nop. */
1272 && ! set_noop_p (set)
1273 /* Don't GCSE if it has attached REG_EQUIV note.
1274 At this point this only function parameters should have
1275 REG_EQUIV notes and if the argument slot is used somewhere
1276 explicitly, it means address of parameter has been taken,
1277 so we should not extend the lifetime of the pseudo. */
1278 && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
1279 || ! MEM_P (XEXP (note, 0))))
1281 /* Stores are never anticipatable. */
1282 int antic_p = 0;
1283 /* An expression is not available if its operands are
1284 subsequently modified, including this insn. It's also not
1285 available if this is a branch, because we can't insert
1286 a set after the branch. */
1287 int avail_p = oprs_available_p (dest, insn)
1288 && ! JUMP_P (insn);
1290 /* Record the memory expression (DEST) in the hash table. */
1291 insert_expr_in_table (dest, GET_MODE (dest), insn,
1292 antic_p, avail_p, max_distance, table);
1297 static void
1298 hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1299 struct hash_table_d *table ATTRIBUTE_UNUSED)
1301 /* Currently nothing to do. */
1304 static void
1305 hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1306 struct hash_table_d *table ATTRIBUTE_UNUSED)
1308 /* Currently nothing to do. */
1311 /* Process INSN and add hash table entries as appropriate. */
1313 static void
1314 hash_scan_insn (rtx insn, struct hash_table_d *table)
1316 rtx pat = PATTERN (insn);
1317 int i;
1319 /* Pick out the sets of INSN and for other forms of instructions record
1320 what's been modified. */
1322 if (GET_CODE (pat) == SET)
1323 hash_scan_set (pat, insn, table);
1325 else if (GET_CODE (pat) == CLOBBER)
1326 hash_scan_clobber (pat, insn, table);
1328 else if (GET_CODE (pat) == CALL)
1329 hash_scan_call (pat, insn, table);
1331 else if (GET_CODE (pat) == PARALLEL)
1332 for (i = 0; i < XVECLEN (pat, 0); i++)
1334 rtx x = XVECEXP (pat, 0, i);
1336 if (GET_CODE (x) == SET)
1337 hash_scan_set (x, insn, table);
1338 else if (GET_CODE (x) == CLOBBER)
1339 hash_scan_clobber (x, insn, table);
1340 else if (GET_CODE (x) == CALL)
1341 hash_scan_call (x, insn, table);
1345 /* Dump the hash table TABLE to file FILE under the name NAME. */
1347 static void
1348 dump_hash_table (FILE *file, const char *name, struct hash_table_d *table)
1350 int i;
1351 /* Flattened out table, so it's printed in proper order. */
1352 struct expr **flat_table;
1353 unsigned int *hash_val;
1354 struct expr *expr;
1356 flat_table = XCNEWVEC (struct expr *, table->n_elems);
1357 hash_val = XNEWVEC (unsigned int, table->n_elems);
1359 for (i = 0; i < (int) table->size; i++)
1360 for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
1362 flat_table[expr->bitmap_index] = expr;
1363 hash_val[expr->bitmap_index] = i;
1366 fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1367 name, table->size, table->n_elems);
1369 for (i = 0; i < (int) table->n_elems; i++)
1370 if (flat_table[i] != 0)
1372 expr = flat_table[i];
1373 fprintf (file, "Index %d (hash value %d; max distance %d)\n ",
1374 expr->bitmap_index, hash_val[i], expr->max_distance);
1375 print_rtl (file, expr->expr);
1376 fprintf (file, "\n");
1379 fprintf (file, "\n");
1381 free (flat_table);
1382 free (hash_val);
1385 /* Record register first/last/block set information for REGNO in INSN.
1387 first_set records the first place in the block where the register
1388 is set and is used to compute "anticipatability".
1390 last_set records the last place in the block where the register
1391 is set and is used to compute "availability".
1393 last_bb records the block for which first_set and last_set are
1394 valid, as a quick test to invalidate them. */
1396 static void
1397 record_last_reg_set_info (rtx insn, int regno)
1399 struct reg_avail_info *info = &reg_avail_info[regno];
1400 int luid = DF_INSN_LUID (insn);
1402 info->last_set = luid;
1403 if (info->last_bb != current_bb)
1405 info->last_bb = current_bb;
1406 info->first_set = luid;
1410 /* Record all of the canonicalized MEMs of record_last_mem_set_info's insn.
1411 Note we store a pair of elements in the list, so they have to be
1412 taken off pairwise. */
1414 static void
1415 canon_list_insert (rtx dest ATTRIBUTE_UNUSED, const_rtx x ATTRIBUTE_UNUSED,
1416 void * v_insn)
1418 rtx dest_addr, insn;
1419 int bb;
1420 modify_pair pair;
1422 while (GET_CODE (dest) == SUBREG
1423 || GET_CODE (dest) == ZERO_EXTRACT
1424 || GET_CODE (dest) == STRICT_LOW_PART)
1425 dest = XEXP (dest, 0);
1427 /* If DEST is not a MEM, then it will not conflict with a load. Note
1428 that function calls are assumed to clobber memory, but are handled
1429 elsewhere. */
1431 if (! MEM_P (dest))
1432 return;
1434 dest_addr = get_addr (XEXP (dest, 0));
1435 dest_addr = canon_rtx (dest_addr);
1436 insn = (rtx) v_insn;
1437 bb = BLOCK_FOR_INSN (insn)->index;
1439 pair.dest = dest;
1440 pair.dest_addr = dest_addr;
1441 VEC_safe_push (modify_pair, heap, canon_modify_mem_list[bb], pair);
1444 /* Record memory modification information for INSN. We do not actually care
1445 about the memory location(s) that are set, or even how they are set (consider
1446 a CALL_INSN). We merely need to record which insns modify memory. */
1448 static void
1449 record_last_mem_set_info (rtx insn)
1451 int bb = BLOCK_FOR_INSN (insn)->index;
1453 /* load_killed_in_block_p will handle the case of calls clobbering
1454 everything. */
1455 VEC_safe_push (rtx, heap, modify_mem_list[bb], insn);
1456 bitmap_set_bit (modify_mem_list_set, bb);
1458 if (CALL_P (insn))
1459 bitmap_set_bit (blocks_with_calls, bb);
1460 else
1461 note_stores (PATTERN (insn), canon_list_insert, (void*) insn);
1464 /* Called from compute_hash_table via note_stores to handle one
1465 SET or CLOBBER in an insn. DATA is really the instruction in which
1466 the SET is taking place. */
1468 static void
1469 record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
1471 rtx last_set_insn = (rtx) data;
1473 if (GET_CODE (dest) == SUBREG)
1474 dest = SUBREG_REG (dest);
1476 if (REG_P (dest))
1477 record_last_reg_set_info (last_set_insn, REGNO (dest));
1478 else if (MEM_P (dest)
1479 /* Ignore pushes, they clobber nothing. */
1480 && ! push_operand (dest, GET_MODE (dest)))
1481 record_last_mem_set_info (last_set_insn);
1484 /* Top level function to create an expression hash table.
1486 Expression entries are placed in the hash table if
1487 - they are of the form (set (pseudo-reg) src),
1488 - src is something we want to perform GCSE on,
1489 - none of the operands are subsequently modified in the block
1491 Currently src must be a pseudo-reg or a const_int.
1493 TABLE is the table computed. */
1495 static void
1496 compute_hash_table_work (struct hash_table_d *table)
1498 int i;
1500 /* re-Cache any INSN_LIST nodes we have allocated. */
1501 clear_modify_mem_tables ();
1502 /* Some working arrays used to track first and last set in each block. */
1503 reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ());
1505 for (i = 0; i < max_reg_num (); ++i)
1506 reg_avail_info[i].last_bb = NULL;
1508 FOR_EACH_BB (current_bb)
1510 rtx insn;
1511 unsigned int regno;
1513 /* First pass over the instructions records information used to
1514 determine when registers and memory are first and last set. */
1515 FOR_BB_INSNS (current_bb, insn)
1517 if (!NONDEBUG_INSN_P (insn))
1518 continue;
1520 if (CALL_P (insn))
1522 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1523 if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
1524 record_last_reg_set_info (insn, regno);
1526 if (! RTL_CONST_OR_PURE_CALL_P (insn))
1527 record_last_mem_set_info (insn);
1530 note_stores (PATTERN (insn), record_last_set_info, insn);
1533 /* The next pass builds the hash table. */
1534 FOR_BB_INSNS (current_bb, insn)
1535 if (NONDEBUG_INSN_P (insn))
1536 hash_scan_insn (insn, table);
1539 free (reg_avail_info);
1540 reg_avail_info = NULL;
1543 /* Allocate space for the set/expr hash TABLE.
1544 It is used to determine the number of buckets to use. */
1546 static void
1547 alloc_hash_table (struct hash_table_d *table)
1549 int n;
1551 n = get_max_insn_count ();
1553 table->size = n / 4;
1554 if (table->size < 11)
1555 table->size = 11;
1557 /* Attempt to maintain efficient use of hash table.
1558 Making it an odd number is simplest for now.
1559 ??? Later take some measurements. */
1560 table->size |= 1;
1561 n = table->size * sizeof (struct expr *);
1562 table->table = GNEWVAR (struct expr *, n);
1565 /* Free things allocated by alloc_hash_table. */
1567 static void
1568 free_hash_table (struct hash_table_d *table)
1570 free (table->table);
1573 /* Compute the expression hash table TABLE. */
1575 static void
1576 compute_hash_table (struct hash_table_d *table)
1578 /* Initialize count of number of entries in hash table. */
1579 table->n_elems = 0;
1580 memset (table->table, 0, table->size * sizeof (struct expr *));
1582 compute_hash_table_work (table);
1585 /* Expression tracking support. */
1587 /* Clear canon_modify_mem_list and modify_mem_list tables. */
1588 static void
1589 clear_modify_mem_tables (void)
1591 unsigned i;
1592 bitmap_iterator bi;
1594 EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
1596 VEC_free (rtx, heap, modify_mem_list[i]);
1597 VEC_free (modify_pair, heap, canon_modify_mem_list[i]);
1599 bitmap_clear (modify_mem_list_set);
1600 bitmap_clear (blocks_with_calls);
1603 /* Release memory used by modify_mem_list_set. */
1605 static void
1606 free_modify_mem_tables (void)
1608 clear_modify_mem_tables ();
1609 free (modify_mem_list);
1610 free (canon_modify_mem_list);
1611 modify_mem_list = 0;
1612 canon_modify_mem_list = 0;
1615 /* For each block, compute whether X is transparent. X is either an
1616 expression or an assignment [though we don't care which, for this context
1617 an assignment is treated as an expression]. For each block where an
1618 element of X is modified, reset the INDX bit in BMAP. */
1620 static void
1621 compute_transp (const_rtx x, int indx, sbitmap *bmap)
1623 int i, j;
1624 enum rtx_code code;
1625 const char *fmt;
1627 /* repeat is used to turn tail-recursion into iteration since GCC
1628 can't do it when there's no return value. */
1629 repeat:
1631 if (x == 0)
1632 return;
1634 code = GET_CODE (x);
1635 switch (code)
1637 case REG:
1639 df_ref def;
1640 for (def = DF_REG_DEF_CHAIN (REGNO (x));
1641 def;
1642 def = DF_REF_NEXT_REG (def))
1643 RESET_BIT (bmap[DF_REF_BB (def)->index], indx);
1646 return;
1648 case MEM:
1649 if (! MEM_READONLY_P (x))
1651 bitmap_iterator bi;
1652 unsigned bb_index;
1654 /* First handle all the blocks with calls. We don't need to
1655 do any list walking for them. */
1656 EXECUTE_IF_SET_IN_BITMAP (blocks_with_calls, 0, bb_index, bi)
1658 RESET_BIT (bmap[bb_index], indx);
1661 /* Now iterate over the blocks which have memory modifications
1662 but which do not have any calls. */
1663 EXECUTE_IF_AND_COMPL_IN_BITMAP (modify_mem_list_set,
1664 blocks_with_calls,
1665 0, bb_index, bi)
1667 VEC (modify_pair,heap) *list
1668 = canon_modify_mem_list[bb_index];
1669 modify_pair *pair;
1670 unsigned ix;
1672 FOR_EACH_VEC_ELT_REVERSE (modify_pair, list, ix, pair)
1674 rtx dest = pair->dest;
1675 rtx dest_addr = pair->dest_addr;
1677 if (canon_true_dependence (dest, GET_MODE (dest),
1678 dest_addr, x, NULL_RTX))
1679 RESET_BIT (bmap[bb_index], indx);
1684 x = XEXP (x, 0);
1685 goto repeat;
1687 case PC:
1688 case CC0: /*FIXME*/
1689 case CONST:
1690 CASE_CONST_ANY:
1691 case SYMBOL_REF:
1692 case LABEL_REF:
1693 case ADDR_VEC:
1694 case ADDR_DIFF_VEC:
1695 return;
1697 default:
1698 break;
1701 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1703 if (fmt[i] == 'e')
1705 /* If we are about to do the last recursive call
1706 needed at this level, change it into iteration.
1707 This function is called enough to be worth it. */
1708 if (i == 0)
1710 x = XEXP (x, i);
1711 goto repeat;
1714 compute_transp (XEXP (x, i), indx, bmap);
1716 else if (fmt[i] == 'E')
1717 for (j = 0; j < XVECLEN (x, i); j++)
1718 compute_transp (XVECEXP (x, i, j), indx, bmap);
1722 /* Compute PRE+LCM working variables. */
1724 /* Local properties of expressions. */
1726 /* Nonzero for expressions that are transparent in the block. */
1727 static sbitmap *transp;
1729 /* Nonzero for expressions that are computed (available) in the block. */
1730 static sbitmap *comp;
1732 /* Nonzero for expressions that are locally anticipatable in the block. */
1733 static sbitmap *antloc;
1735 /* Nonzero for expressions where this block is an optimal computation
1736 point. */
1737 static sbitmap *pre_optimal;
1739 /* Nonzero for expressions which are redundant in a particular block. */
1740 static sbitmap *pre_redundant;
1742 /* Nonzero for expressions which should be inserted on a specific edge. */
1743 static sbitmap *pre_insert_map;
1745 /* Nonzero for expressions which should be deleted in a specific block. */
1746 static sbitmap *pre_delete_map;
1748 /* Allocate vars used for PRE analysis. */
1750 static void
1751 alloc_pre_mem (int n_blocks, int n_exprs)
1753 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
1754 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
1755 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
1757 pre_optimal = NULL;
1758 pre_redundant = NULL;
1759 pre_insert_map = NULL;
1760 pre_delete_map = NULL;
1761 ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
1763 /* pre_insert and pre_delete are allocated later. */
1766 /* Free vars used for PRE analysis. */
1768 static void
1769 free_pre_mem (void)
1771 sbitmap_vector_free (transp);
1772 sbitmap_vector_free (comp);
1774 /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */
1776 if (pre_optimal)
1777 sbitmap_vector_free (pre_optimal);
1778 if (pre_redundant)
1779 sbitmap_vector_free (pre_redundant);
1780 if (pre_insert_map)
1781 sbitmap_vector_free (pre_insert_map);
1782 if (pre_delete_map)
1783 sbitmap_vector_free (pre_delete_map);
1785 transp = comp = NULL;
1786 pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
1789 /* Remove certain expressions from anticipatable and transparent
1790 sets of basic blocks that have incoming abnormal edge.
1791 For PRE remove potentially trapping expressions to avoid placing
1792 them on abnormal edges. For hoisting remove memory references that
1793 can be clobbered by calls. */
1795 static void
1796 prune_expressions (bool pre_p)
1798 sbitmap prune_exprs;
1799 struct expr *expr;
1800 unsigned int ui;
1801 basic_block bb;
1803 prune_exprs = sbitmap_alloc (expr_hash_table.n_elems);
1804 sbitmap_zero (prune_exprs);
1805 for (ui = 0; ui < expr_hash_table.size; ui++)
1807 for (expr = expr_hash_table.table[ui]; expr; expr = expr->next_same_hash)
1809 /* Note potentially trapping expressions. */
1810 if (may_trap_p (expr->expr))
1812 SET_BIT (prune_exprs, expr->bitmap_index);
1813 continue;
1816 if (!pre_p && MEM_P (expr->expr))
1817 /* Note memory references that can be clobbered by a call.
1818 We do not split abnormal edges in hoisting, so would
1819 a memory reference get hoisted along an abnormal edge,
1820 it would be placed /before/ the call. Therefore, only
1821 constant memory references can be hoisted along abnormal
1822 edges. */
1824 if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
1825 && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
1826 continue;
1828 if (MEM_READONLY_P (expr->expr)
1829 && !MEM_VOLATILE_P (expr->expr)
1830 && MEM_NOTRAP_P (expr->expr))
1831 /* Constant memory reference, e.g., a PIC address. */
1832 continue;
1834 /* ??? Optimally, we would use interprocedural alias
1835 analysis to determine if this mem is actually killed
1836 by this call. */
1838 SET_BIT (prune_exprs, expr->bitmap_index);
1843 FOR_EACH_BB (bb)
1845 edge e;
1846 edge_iterator ei;
1848 /* If the current block is the destination of an abnormal edge, we
1849 kill all trapping (for PRE) and memory (for hoist) expressions
1850 because we won't be able to properly place the instruction on
1851 the edge. So make them neither anticipatable nor transparent.
1852 This is fairly conservative.
1854 ??? For hoisting it may be necessary to check for set-and-jump
1855 instructions here, not just for abnormal edges. The general problem
1856 is that when an expression cannot not be placed right at the end of
1857 a basic block we should account for any side-effects of a subsequent
1858 jump instructions that could clobber the expression. It would
1859 be best to implement this check along the lines of
1860 hoist_expr_reaches_here_p where the target block is already known
1861 and, hence, there's no need to conservatively prune expressions on
1862 "intermediate" set-and-jump instructions. */
1863 FOR_EACH_EDGE (e, ei, bb->preds)
1864 if ((e->flags & EDGE_ABNORMAL)
1865 && (pre_p || CALL_P (BB_END (e->src))))
1867 sbitmap_difference (antloc[bb->index],
1868 antloc[bb->index], prune_exprs);
1869 sbitmap_difference (transp[bb->index],
1870 transp[bb->index], prune_exprs);
1871 break;
1875 sbitmap_free (prune_exprs);
1878 /* It may be necessary to insert a large number of insns on edges to
1879 make the existing occurrences of expressions fully redundant. This
1880 routine examines the set of insertions and deletions and if the ratio
1881 of insertions to deletions is too high for a particular expression, then
1882 the expression is removed from the insertion/deletion sets.
1884 N_ELEMS is the number of elements in the hash table. */
1886 static void
1887 prune_insertions_deletions (int n_elems)
1889 sbitmap_iterator sbi;
1890 sbitmap prune_exprs;
1892 /* We always use I to iterate over blocks/edges and J to iterate over
1893 expressions. */
1894 unsigned int i, j;
1896 /* Counts for the number of times an expression needs to be inserted and
1897 number of times an expression can be removed as a result. */
1898 int *insertions = GCNEWVEC (int, n_elems);
1899 int *deletions = GCNEWVEC (int, n_elems);
1901 /* Set of expressions which require too many insertions relative to
1902 the number of deletions achieved. We will prune these out of the
1903 insertion/deletion sets. */
1904 prune_exprs = sbitmap_alloc (n_elems);
1905 sbitmap_zero (prune_exprs);
1907 /* Iterate over the edges counting the number of times each expression
1908 needs to be inserted. */
1909 for (i = 0; i < (unsigned) n_edges; i++)
1911 EXECUTE_IF_SET_IN_SBITMAP (pre_insert_map[i], 0, j, sbi)
1912 insertions[j]++;
1915 /* Similarly for deletions, but those occur in blocks rather than on
1916 edges. */
1917 for (i = 0; i < (unsigned) last_basic_block; i++)
1919 EXECUTE_IF_SET_IN_SBITMAP (pre_delete_map[i], 0, j, sbi)
1920 deletions[j]++;
1923 /* Now that we have accurate counts, iterate over the elements in the
1924 hash table and see if any need too many insertions relative to the
1925 number of evaluations that can be removed. If so, mark them in
1926 PRUNE_EXPRS. */
1927 for (j = 0; j < (unsigned) n_elems; j++)
1928 if (deletions[j]
1929 && ((unsigned) insertions[j] / deletions[j]) > MAX_GCSE_INSERTION_RATIO)
1930 SET_BIT (prune_exprs, j);
1932 /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS. */
1933 EXECUTE_IF_SET_IN_SBITMAP (prune_exprs, 0, j, sbi)
1935 for (i = 0; i < (unsigned) n_edges; i++)
1936 RESET_BIT (pre_insert_map[i], j);
1938 for (i = 0; i < (unsigned) last_basic_block; i++)
1939 RESET_BIT (pre_delete_map[i], j);
1942 sbitmap_free (prune_exprs);
1943 free (insertions);
1944 free (deletions);
1947 /* Top level routine to do the dataflow analysis needed by PRE. */
1949 static struct edge_list *
1950 compute_pre_data (void)
1952 struct edge_list *edge_list;
1953 basic_block bb;
1955 compute_local_properties (transp, comp, antloc, &expr_hash_table);
1956 prune_expressions (true);
1957 sbitmap_vector_zero (ae_kill, last_basic_block);
1959 /* Compute ae_kill for each basic block using:
1961 ~(TRANSP | COMP)
1964 FOR_EACH_BB (bb)
1966 sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
1967 sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
1970 edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc,
1971 ae_kill, &pre_insert_map, &pre_delete_map);
1972 sbitmap_vector_free (antloc);
1973 antloc = NULL;
1974 sbitmap_vector_free (ae_kill);
1975 ae_kill = NULL;
1977 prune_insertions_deletions (expr_hash_table.n_elems);
1979 return edge_list;
1982 /* PRE utilities */
1984 /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach
1985 block BB.
1987 VISITED is a pointer to a working buffer for tracking which BB's have
1988 been visited. It is NULL for the top-level call.
1990 We treat reaching expressions that go through blocks containing the same
1991 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
1992 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
1993 2 as not reaching. The intent is to improve the probability of finding
1994 only one reaching expression and to reduce register lifetimes by picking
1995 the closest such expression. */
1997 static int
1998 pre_expr_reaches_here_p_work (basic_block occr_bb, struct expr *expr,
1999 basic_block bb, char *visited)
2001 edge pred;
2002 edge_iterator ei;
2004 FOR_EACH_EDGE (pred, ei, bb->preds)
2006 basic_block pred_bb = pred->src;
2008 if (pred->src == ENTRY_BLOCK_PTR
2009 /* Has predecessor has already been visited? */
2010 || visited[pred_bb->index])
2011 ;/* Nothing to do. */
2013 /* Does this predecessor generate this expression? */
2014 else if (TEST_BIT (comp[pred_bb->index], expr->bitmap_index))
2016 /* Is this the occurrence we're looking for?
2017 Note that there's only one generating occurrence per block
2018 so we just need to check the block number. */
2019 if (occr_bb == pred_bb)
2020 return 1;
2022 visited[pred_bb->index] = 1;
2024 /* Ignore this predecessor if it kills the expression. */
2025 else if (! TEST_BIT (transp[pred_bb->index], expr->bitmap_index))
2026 visited[pred_bb->index] = 1;
2028 /* Neither gen nor kill. */
2029 else
2031 visited[pred_bb->index] = 1;
2032 if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
2033 return 1;
2037 /* All paths have been checked. */
2038 return 0;
2041 /* The wrapper for pre_expr_reaches_here_work that ensures that any
2042 memory allocated for that function is returned. */
2044 static int
2045 pre_expr_reaches_here_p (basic_block occr_bb, struct expr *expr, basic_block bb)
2047 int rval;
2048 char *visited = XCNEWVEC (char, last_basic_block);
2050 rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
2052 free (visited);
2053 return rval;
2056 /* Generate RTL to copy an EXPR to its `reaching_reg' and return it. */
2058 static rtx
2059 process_insert_insn (struct expr *expr)
2061 rtx reg = expr->reaching_reg;
2062 /* Copy the expression to make sure we don't have any sharing issues. */
2063 rtx exp = copy_rtx (expr->expr);
2064 rtx pat;
2066 start_sequence ();
2068 /* If the expression is something that's an operand, like a constant,
2069 just copy it to a register. */
2070 if (general_operand (exp, GET_MODE (reg)))
2071 emit_move_insn (reg, exp);
2073 /* Otherwise, make a new insn to compute this expression and make sure the
2074 insn will be recognized (this also adds any needed CLOBBERs). */
2075 else
2077 rtx insn = emit_insn (gen_rtx_SET (VOIDmode, reg, exp));
2079 if (insn_invalid_p (insn, false))
2080 gcc_unreachable ();
2083 pat = get_insns ();
2084 end_sequence ();
2086 return pat;
2089 /* Add EXPR to the end of basic block BB.
2091 This is used by both the PRE and code hoisting. */
2093 static void
2094 insert_insn_end_basic_block (struct expr *expr, basic_block bb)
2096 rtx insn = BB_END (bb);
2097 rtx new_insn;
2098 rtx reg = expr->reaching_reg;
2099 int regno = REGNO (reg);
2100 rtx pat, pat_end;
2102 pat = process_insert_insn (expr);
2103 gcc_assert (pat && INSN_P (pat));
2105 pat_end = pat;
2106 while (NEXT_INSN (pat_end) != NULL_RTX)
2107 pat_end = NEXT_INSN (pat_end);
2109 /* If the last insn is a jump, insert EXPR in front [taking care to
2110 handle cc0, etc. properly]. Similarly we need to care trapping
2111 instructions in presence of non-call exceptions. */
2113 if (JUMP_P (insn)
2114 || (NONJUMP_INSN_P (insn)
2115 && (!single_succ_p (bb)
2116 || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
2118 #ifdef HAVE_cc0
2119 rtx note;
2120 #endif
2122 /* If this is a jump table, then we can't insert stuff here. Since
2123 we know the previous real insn must be the tablejump, we insert
2124 the new instruction just before the tablejump. */
2125 if (GET_CODE (PATTERN (insn)) == ADDR_VEC
2126 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
2127 insn = prev_active_insn (insn);
2129 #ifdef HAVE_cc0
2130 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
2131 if cc0 isn't set. */
2132 note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
2133 if (note)
2134 insn = XEXP (note, 0);
2135 else
2137 rtx maybe_cc0_setter = prev_nonnote_insn (insn);
2138 if (maybe_cc0_setter
2139 && INSN_P (maybe_cc0_setter)
2140 && sets_cc0_p (PATTERN (maybe_cc0_setter)))
2141 insn = maybe_cc0_setter;
2143 #endif
2144 /* FIXME: What if something in cc0/jump uses value set in new insn? */
2145 new_insn = emit_insn_before_noloc (pat, insn, bb);
2148 /* Likewise if the last insn is a call, as will happen in the presence
2149 of exception handling. */
2150 else if (CALL_P (insn)
2151 && (!single_succ_p (bb)
2152 || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
2154 /* Keeping in mind targets with small register classes and parameters
2155 in registers, we search backward and place the instructions before
2156 the first parameter is loaded. Do this for everyone for consistency
2157 and a presumption that we'll get better code elsewhere as well. */
2159 /* Since different machines initialize their parameter registers
2160 in different orders, assume nothing. Collect the set of all
2161 parameter registers. */
2162 insn = find_first_parameter_load (insn, BB_HEAD (bb));
2164 /* If we found all the parameter loads, then we want to insert
2165 before the first parameter load.
2167 If we did not find all the parameter loads, then we might have
2168 stopped on the head of the block, which could be a CODE_LABEL.
2169 If we inserted before the CODE_LABEL, then we would be putting
2170 the insn in the wrong basic block. In that case, put the insn
2171 after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */
2172 while (LABEL_P (insn)
2173 || NOTE_INSN_BASIC_BLOCK_P (insn))
2174 insn = NEXT_INSN (insn);
2176 new_insn = emit_insn_before_noloc (pat, insn, bb);
2178 else
2179 new_insn = emit_insn_after_noloc (pat, insn, bb);
2181 while (1)
2183 if (INSN_P (pat))
2184 add_label_notes (PATTERN (pat), new_insn);
2185 if (pat == pat_end)
2186 break;
2187 pat = NEXT_INSN (pat);
2190 gcse_create_count++;
2192 if (dump_file)
2194 fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ",
2195 bb->index, INSN_UID (new_insn));
2196 fprintf (dump_file, "copying expression %d to reg %d\n",
2197 expr->bitmap_index, regno);
2201 /* Insert partially redundant expressions on edges in the CFG to make
2202 the expressions fully redundant. */
2204 static int
2205 pre_edge_insert (struct edge_list *edge_list, struct expr **index_map)
2207 int e, i, j, num_edges, set_size, did_insert = 0;
2208 sbitmap *inserted;
2210 /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
2211 if it reaches any of the deleted expressions. */
2213 set_size = pre_insert_map[0]->size;
2214 num_edges = NUM_EDGES (edge_list);
2215 inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems);
2216 sbitmap_vector_zero (inserted, num_edges);
2218 for (e = 0; e < num_edges; e++)
2220 int indx;
2221 basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
2223 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
2225 SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
2227 for (j = indx;
2228 insert && j < (int) expr_hash_table.n_elems;
2229 j++, insert >>= 1)
2230 if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
2232 struct expr *expr = index_map[j];
2233 struct occr *occr;
2235 /* Now look at each deleted occurrence of this expression. */
2236 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2238 if (! occr->deleted_p)
2239 continue;
2241 /* Insert this expression on this edge if it would
2242 reach the deleted occurrence in BB. */
2243 if (!TEST_BIT (inserted[e], j))
2245 rtx insn;
2246 edge eg = INDEX_EDGE (edge_list, e);
2248 /* We can't insert anything on an abnormal and
2249 critical edge, so we insert the insn at the end of
2250 the previous block. There are several alternatives
2251 detailed in Morgans book P277 (sec 10.5) for
2252 handling this situation. This one is easiest for
2253 now. */
2255 if (eg->flags & EDGE_ABNORMAL)
2256 insert_insn_end_basic_block (index_map[j], bb);
2257 else
2259 insn = process_insert_insn (index_map[j]);
2260 insert_insn_on_edge (insn, eg);
2263 if (dump_file)
2265 fprintf (dump_file, "PRE: edge (%d,%d), ",
2266 bb->index,
2267 INDEX_EDGE_SUCC_BB (edge_list, e)->index);
2268 fprintf (dump_file, "copy expression %d\n",
2269 expr->bitmap_index);
2272 update_ld_motion_stores (expr);
2273 SET_BIT (inserted[e], j);
2274 did_insert = 1;
2275 gcse_create_count++;
2282 sbitmap_vector_free (inserted);
2283 return did_insert;
2286 /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
2287 Given "old_reg <- expr" (INSN), instead of adding after it
2288 reaching_reg <- old_reg
2289 it's better to do the following:
2290 reaching_reg <- expr
2291 old_reg <- reaching_reg
2292 because this way copy propagation can discover additional PRE
2293 opportunities. But if this fails, we try the old way.
2294 When "expr" is a store, i.e.
2295 given "MEM <- old_reg", instead of adding after it
2296 reaching_reg <- old_reg
2297 it's better to add it before as follows:
2298 reaching_reg <- old_reg
2299 MEM <- reaching_reg. */
2301 static void
2302 pre_insert_copy_insn (struct expr *expr, rtx insn)
2304 rtx reg = expr->reaching_reg;
2305 int regno = REGNO (reg);
2306 int indx = expr->bitmap_index;
2307 rtx pat = PATTERN (insn);
2308 rtx set, first_set, new_insn;
2309 rtx old_reg;
2310 int i;
2312 /* This block matches the logic in hash_scan_insn. */
2313 switch (GET_CODE (pat))
2315 case SET:
2316 set = pat;
2317 break;
2319 case PARALLEL:
2320 /* Search through the parallel looking for the set whose
2321 source was the expression that we're interested in. */
2322 first_set = NULL_RTX;
2323 set = NULL_RTX;
2324 for (i = 0; i < XVECLEN (pat, 0); i++)
2326 rtx x = XVECEXP (pat, 0, i);
2327 if (GET_CODE (x) == SET)
2329 /* If the source was a REG_EQUAL or REG_EQUIV note, we
2330 may not find an equivalent expression, but in this
2331 case the PARALLEL will have a single set. */
2332 if (first_set == NULL_RTX)
2333 first_set = x;
2334 if (expr_equiv_p (SET_SRC (x), expr->expr))
2336 set = x;
2337 break;
2342 gcc_assert (first_set);
2343 if (set == NULL_RTX)
2344 set = first_set;
2345 break;
2347 default:
2348 gcc_unreachable ();
2351 if (REG_P (SET_DEST (set)))
2353 old_reg = SET_DEST (set);
2354 /* Check if we can modify the set destination in the original insn. */
2355 if (validate_change (insn, &SET_DEST (set), reg, 0))
2357 new_insn = gen_move_insn (old_reg, reg);
2358 new_insn = emit_insn_after (new_insn, insn);
2360 else
2362 new_insn = gen_move_insn (reg, old_reg);
2363 new_insn = emit_insn_after (new_insn, insn);
2366 else /* This is possible only in case of a store to memory. */
2368 old_reg = SET_SRC (set);
2369 new_insn = gen_move_insn (reg, old_reg);
2371 /* Check if we can modify the set source in the original insn. */
2372 if (validate_change (insn, &SET_SRC (set), reg, 0))
2373 new_insn = emit_insn_before (new_insn, insn);
2374 else
2375 new_insn = emit_insn_after (new_insn, insn);
2378 gcse_create_count++;
2380 if (dump_file)
2381 fprintf (dump_file,
2382 "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
2383 BLOCK_FOR_INSN (insn)->index, INSN_UID (new_insn), indx,
2384 INSN_UID (insn), regno);
2387 /* Copy available expressions that reach the redundant expression
2388 to `reaching_reg'. */
2390 static void
2391 pre_insert_copies (void)
2393 unsigned int i, added_copy;
2394 struct expr *expr;
2395 struct occr *occr;
2396 struct occr *avail;
2398 /* For each available expression in the table, copy the result to
2399 `reaching_reg' if the expression reaches a deleted one.
2401 ??? The current algorithm is rather brute force.
2402 Need to do some profiling. */
2404 for (i = 0; i < expr_hash_table.size; i++)
2405 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2407 /* If the basic block isn't reachable, PPOUT will be TRUE. However,
2408 we don't want to insert a copy here because the expression may not
2409 really be redundant. So only insert an insn if the expression was
2410 deleted. This test also avoids further processing if the
2411 expression wasn't deleted anywhere. */
2412 if (expr->reaching_reg == NULL)
2413 continue;
2415 /* Set when we add a copy for that expression. */
2416 added_copy = 0;
2418 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2420 if (! occr->deleted_p)
2421 continue;
2423 for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
2425 rtx insn = avail->insn;
2427 /* No need to handle this one if handled already. */
2428 if (avail->copied_p)
2429 continue;
2431 /* Don't handle this one if it's a redundant one. */
2432 if (INSN_DELETED_P (insn))
2433 continue;
2435 /* Or if the expression doesn't reach the deleted one. */
2436 if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
2437 expr,
2438 BLOCK_FOR_INSN (occr->insn)))
2439 continue;
2441 added_copy = 1;
2443 /* Copy the result of avail to reaching_reg. */
2444 pre_insert_copy_insn (expr, insn);
2445 avail->copied_p = 1;
2449 if (added_copy)
2450 update_ld_motion_stores (expr);
2454 /* Emit move from SRC to DEST noting the equivalence with expression computed
2455 in INSN. */
2457 static rtx
2458 gcse_emit_move_after (rtx dest, rtx src, rtx insn)
2460 rtx new_rtx;
2461 rtx set = single_set (insn), set2;
2462 rtx note;
2463 rtx eqv;
2465 /* This should never fail since we're creating a reg->reg copy
2466 we've verified to be valid. */
2468 new_rtx = emit_insn_after (gen_move_insn (dest, src), insn);
2470 /* Note the equivalence for local CSE pass. */
2471 set2 = single_set (new_rtx);
2472 if (!set2 || !rtx_equal_p (SET_DEST (set2), dest))
2473 return new_rtx;
2474 if ((note = find_reg_equal_equiv_note (insn)))
2475 eqv = XEXP (note, 0);
2476 else
2477 eqv = SET_SRC (set);
2479 set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv));
2481 return new_rtx;
2484 /* Delete redundant computations.
2485 Deletion is done by changing the insn to copy the `reaching_reg' of
2486 the expression into the result of the SET. It is left to later passes
2487 (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
2489 Return nonzero if a change is made. */
2491 static int
2492 pre_delete (void)
2494 unsigned int i;
2495 int changed;
2496 struct expr *expr;
2497 struct occr *occr;
2499 changed = 0;
2500 for (i = 0; i < expr_hash_table.size; i++)
2501 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2503 int indx = expr->bitmap_index;
2505 /* We only need to search antic_occr since we require ANTLOC != 0. */
2506 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2508 rtx insn = occr->insn;
2509 rtx set;
2510 basic_block bb = BLOCK_FOR_INSN (insn);
2512 /* We only delete insns that have a single_set. */
2513 if (TEST_BIT (pre_delete_map[bb->index], indx)
2514 && (set = single_set (insn)) != 0
2515 && dbg_cnt (pre_insn))
2517 /* Create a pseudo-reg to store the result of reaching
2518 expressions into. Get the mode for the new pseudo from
2519 the mode of the original destination pseudo. */
2520 if (expr->reaching_reg == NULL)
2521 expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set));
2523 gcse_emit_move_after (SET_DEST (set), expr->reaching_reg, insn);
2524 delete_insn (insn);
2525 occr->deleted_p = 1;
2526 changed = 1;
2527 gcse_subst_count++;
2529 if (dump_file)
2531 fprintf (dump_file,
2532 "PRE: redundant insn %d (expression %d) in ",
2533 INSN_UID (insn), indx);
2534 fprintf (dump_file, "bb %d, reaching reg is %d\n",
2535 bb->index, REGNO (expr->reaching_reg));
2541 return changed;
2544 /* Perform GCSE optimizations using PRE.
2545 This is called by one_pre_gcse_pass after all the dataflow analysis
2546 has been done.
2548 This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
2549 lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
2550 Compiler Design and Implementation.
2552 ??? A new pseudo reg is created to hold the reaching expression. The nice
2553 thing about the classical approach is that it would try to use an existing
2554 reg. If the register can't be adequately optimized [i.e. we introduce
2555 reload problems], one could add a pass here to propagate the new register
2556 through the block.
2558 ??? We don't handle single sets in PARALLELs because we're [currently] not
2559 able to copy the rest of the parallel when we insert copies to create full
2560 redundancies from partial redundancies. However, there's no reason why we
2561 can't handle PARALLELs in the cases where there are no partial
2562 redundancies. */
2564 static int
2565 pre_gcse (struct edge_list *edge_list)
2567 unsigned int i;
2568 int did_insert, changed;
2569 struct expr **index_map;
2570 struct expr *expr;
2572 /* Compute a mapping from expression number (`bitmap_index') to
2573 hash table entry. */
2575 index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
2576 for (i = 0; i < expr_hash_table.size; i++)
2577 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2578 index_map[expr->bitmap_index] = expr;
2580 /* Delete the redundant insns first so that
2581 - we know what register to use for the new insns and for the other
2582 ones with reaching expressions
2583 - we know which insns are redundant when we go to create copies */
2585 changed = pre_delete ();
2586 did_insert = pre_edge_insert (edge_list, index_map);
2588 /* In other places with reaching expressions, copy the expression to the
2589 specially allocated pseudo-reg that reaches the redundant expr. */
2590 pre_insert_copies ();
2591 if (did_insert)
2593 commit_edge_insertions ();
2594 changed = 1;
2597 free (index_map);
2598 return changed;
2601 /* Top level routine to perform one PRE GCSE pass.
2603 Return nonzero if a change was made. */
2605 static int
2606 one_pre_gcse_pass (void)
2608 int changed = 0;
2610 gcse_subst_count = 0;
2611 gcse_create_count = 0;
2613 /* Return if there's nothing to do, or it is too expensive. */
2614 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
2615 || is_too_expensive (_("PRE disabled")))
2616 return 0;
2618 /* We need alias. */
2619 init_alias_analysis ();
2621 bytes_used = 0;
2622 gcc_obstack_init (&gcse_obstack);
2623 alloc_gcse_mem ();
2625 alloc_hash_table (&expr_hash_table);
2626 add_noreturn_fake_exit_edges ();
2627 if (flag_gcse_lm)
2628 compute_ld_motion_mems ();
2630 compute_hash_table (&expr_hash_table);
2631 if (flag_gcse_lm)
2632 trim_ld_motion_mems ();
2633 if (dump_file)
2634 dump_hash_table (dump_file, "Expression", &expr_hash_table);
2636 if (expr_hash_table.n_elems > 0)
2638 struct edge_list *edge_list;
2639 alloc_pre_mem (last_basic_block, expr_hash_table.n_elems);
2640 edge_list = compute_pre_data ();
2641 changed |= pre_gcse (edge_list);
2642 free_edge_list (edge_list);
2643 free_pre_mem ();
2646 if (flag_gcse_lm)
2647 free_ld_motion_mems ();
2648 remove_fake_exit_edges ();
2649 free_hash_table (&expr_hash_table);
2651 free_gcse_mem ();
2652 obstack_free (&gcse_obstack, NULL);
2654 /* We are finished with alias. */
2655 end_alias_analysis ();
2657 if (dump_file)
2659 fprintf (dump_file, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ",
2660 current_function_name (), n_basic_blocks, bytes_used);
2661 fprintf (dump_file, "%d substs, %d insns created\n",
2662 gcse_subst_count, gcse_create_count);
2665 return changed;
2668 /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
2669 to INSN. If such notes are added to an insn which references a
2670 CODE_LABEL, the LABEL_NUSES count is incremented. We have to add
2671 that note, because the following loop optimization pass requires
2672 them. */
2674 /* ??? If there was a jump optimization pass after gcse and before loop,
2675 then we would not need to do this here, because jump would add the
2676 necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes. */
2678 static void
2679 add_label_notes (rtx x, rtx insn)
2681 enum rtx_code code = GET_CODE (x);
2682 int i, j;
2683 const char *fmt;
2685 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
2687 /* This code used to ignore labels that referred to dispatch tables to
2688 avoid flow generating (slightly) worse code.
2690 We no longer ignore such label references (see LABEL_REF handling in
2691 mark_jump_label for additional information). */
2693 /* There's no reason for current users to emit jump-insns with
2694 such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
2695 notes. */
2696 gcc_assert (!JUMP_P (insn));
2697 add_reg_note (insn, REG_LABEL_OPERAND, XEXP (x, 0));
2699 if (LABEL_P (XEXP (x, 0)))
2700 LABEL_NUSES (XEXP (x, 0))++;
2702 return;
2705 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2707 if (fmt[i] == 'e')
2708 add_label_notes (XEXP (x, i), insn);
2709 else if (fmt[i] == 'E')
2710 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2711 add_label_notes (XVECEXP (x, i, j), insn);
2715 /* Code Hoisting variables and subroutines. */
2717 /* Very busy expressions. */
2718 static sbitmap *hoist_vbein;
2719 static sbitmap *hoist_vbeout;
2721 /* ??? We could compute post dominators and run this algorithm in
2722 reverse to perform tail merging, doing so would probably be
2723 more effective than the tail merging code in jump.c.
2725 It's unclear if tail merging could be run in parallel with
2726 code hoisting. It would be nice. */
2728 /* Allocate vars used for code hoisting analysis. */
2730 static void
2731 alloc_code_hoist_mem (int n_blocks, int n_exprs)
2733 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
2734 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
2735 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
2737 hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
2738 hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
2741 /* Free vars used for code hoisting analysis. */
2743 static void
2744 free_code_hoist_mem (void)
2746 sbitmap_vector_free (antloc);
2747 sbitmap_vector_free (transp);
2748 sbitmap_vector_free (comp);
2750 sbitmap_vector_free (hoist_vbein);
2751 sbitmap_vector_free (hoist_vbeout);
2753 free_dominance_info (CDI_DOMINATORS);
2756 /* Compute the very busy expressions at entry/exit from each block.
2758 An expression is very busy if all paths from a given point
2759 compute the expression. */
2761 static void
2762 compute_code_hoist_vbeinout (void)
2764 int changed, passes;
2765 basic_block bb;
2767 sbitmap_vector_zero (hoist_vbeout, last_basic_block);
2768 sbitmap_vector_zero (hoist_vbein, last_basic_block);
2770 passes = 0;
2771 changed = 1;
2773 while (changed)
2775 changed = 0;
2777 /* We scan the blocks in the reverse order to speed up
2778 the convergence. */
2779 FOR_EACH_BB_REVERSE (bb)
2781 if (bb->next_bb != EXIT_BLOCK_PTR)
2783 sbitmap_intersection_of_succs (hoist_vbeout[bb->index],
2784 hoist_vbein, bb);
2786 /* Include expressions in VBEout that are calculated
2787 in BB and available at its end. */
2788 sbitmap_a_or_b (hoist_vbeout[bb->index],
2789 hoist_vbeout[bb->index], comp[bb->index]);
2792 changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index],
2793 antloc[bb->index],
2794 hoist_vbeout[bb->index],
2795 transp[bb->index]);
2798 passes++;
2801 if (dump_file)
2803 fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
2805 FOR_EACH_BB (bb)
2807 fprintf (dump_file, "vbein (%d): ", bb->index);
2808 dump_sbitmap_file (dump_file, hoist_vbein[bb->index]);
2809 fprintf (dump_file, "vbeout(%d): ", bb->index);
2810 dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]);
2815 /* Top level routine to do the dataflow analysis needed by code hoisting. */
2817 static void
2818 compute_code_hoist_data (void)
2820 compute_local_properties (transp, comp, antloc, &expr_hash_table);
2821 prune_expressions (false);
2822 compute_code_hoist_vbeinout ();
2823 calculate_dominance_info (CDI_DOMINATORS);
2824 if (dump_file)
2825 fprintf (dump_file, "\n");
2828 /* Determine if the expression identified by EXPR_INDEX would
2829 reach BB unimpared if it was placed at the end of EXPR_BB.
2830 Stop the search if the expression would need to be moved more
2831 than DISTANCE instructions.
2833 It's unclear exactly what Muchnick meant by "unimpared". It seems
2834 to me that the expression must either be computed or transparent in
2835 *every* block in the path(s) from EXPR_BB to BB. Any other definition
2836 would allow the expression to be hoisted out of loops, even if
2837 the expression wasn't a loop invariant.
2839 Contrast this to reachability for PRE where an expression is
2840 considered reachable if *any* path reaches instead of *all*
2841 paths. */
2843 static int
2844 hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb,
2845 char *visited, int distance, int *bb_size)
2847 edge pred;
2848 edge_iterator ei;
2849 int visited_allocated_locally = 0;
2851 /* Terminate the search if distance, for which EXPR is allowed to move,
2852 is exhausted. */
2853 if (distance > 0)
2855 distance -= bb_size[bb->index];
2857 if (distance <= 0)
2858 return 0;
2860 else
2861 gcc_assert (distance == 0);
2863 if (visited == NULL)
2865 visited_allocated_locally = 1;
2866 visited = XCNEWVEC (char, last_basic_block);
2869 FOR_EACH_EDGE (pred, ei, bb->preds)
2871 basic_block pred_bb = pred->src;
2873 if (pred->src == ENTRY_BLOCK_PTR)
2874 break;
2875 else if (pred_bb == expr_bb)
2876 continue;
2877 else if (visited[pred_bb->index])
2878 continue;
2880 else if (! TEST_BIT (transp[pred_bb->index], expr_index))
2881 break;
2883 /* Not killed. */
2884 else
2886 visited[pred_bb->index] = 1;
2887 if (! hoist_expr_reaches_here_p (expr_bb, expr_index, pred_bb,
2888 visited, distance, bb_size))
2889 break;
2892 if (visited_allocated_locally)
2893 free (visited);
2895 return (pred == NULL);
2898 /* Find occurrence in BB. */
2900 static struct occr *
2901 find_occr_in_bb (struct occr *occr, basic_block bb)
2903 /* Find the right occurrence of this expression. */
2904 while (occr && BLOCK_FOR_INSN (occr->insn) != bb)
2905 occr = occr->next;
2907 return occr;
2910 /* Actually perform code hoisting. */
2912 static int
2913 hoist_code (void)
2915 basic_block bb, dominated;
2916 VEC (basic_block, heap) *dom_tree_walk;
2917 unsigned int dom_tree_walk_index;
2918 VEC (basic_block, heap) *domby;
2919 unsigned int i,j;
2920 struct expr **index_map;
2921 struct expr *expr;
2922 int *to_bb_head;
2923 int *bb_size;
2924 int changed = 0;
2926 /* Compute a mapping from expression number (`bitmap_index') to
2927 hash table entry. */
2929 index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
2930 for (i = 0; i < expr_hash_table.size; i++)
2931 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2932 index_map[expr->bitmap_index] = expr;
2934 /* Calculate sizes of basic blocks and note how far
2935 each instruction is from the start of its block. We then use this
2936 data to restrict distance an expression can travel. */
2938 to_bb_head = XCNEWVEC (int, get_max_uid ());
2939 bb_size = XCNEWVEC (int, last_basic_block);
2941 FOR_EACH_BB (bb)
2943 rtx insn;
2944 int to_head;
2946 to_head = 0;
2947 FOR_BB_INSNS (bb, insn)
2949 /* Don't count debug instructions to avoid them affecting
2950 decision choices. */
2951 if (NONDEBUG_INSN_P (insn))
2952 to_bb_head[INSN_UID (insn)] = to_head++;
2955 bb_size[bb->index] = to_head;
2958 gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR->succs) == 1
2959 && (EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest
2960 == ENTRY_BLOCK_PTR->next_bb));
2962 dom_tree_walk = get_all_dominated_blocks (CDI_DOMINATORS,
2963 ENTRY_BLOCK_PTR->next_bb);
2965 /* Walk over each basic block looking for potentially hoistable
2966 expressions, nothing gets hoisted from the entry block. */
2967 FOR_EACH_VEC_ELT (basic_block, dom_tree_walk, dom_tree_walk_index, bb)
2969 domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH);
2971 if (VEC_length (basic_block, domby) == 0)
2972 continue;
2974 /* Examine each expression that is very busy at the exit of this
2975 block. These are the potentially hoistable expressions. */
2976 for (i = 0; i < SBITMAP_SIZE (hoist_vbeout[bb->index]); i++)
2978 if (TEST_BIT (hoist_vbeout[bb->index], i))
2980 /* Current expression. */
2981 struct expr *expr = index_map[i];
2982 /* Number of occurrences of EXPR that can be hoisted to BB. */
2983 int hoistable = 0;
2984 /* Basic blocks that have occurrences reachable from BB. */
2985 bitmap_head _from_bbs, *from_bbs = &_from_bbs;
2986 /* Occurrences reachable from BB. */
2987 VEC (occr_t, heap) *occrs_to_hoist = NULL;
2988 /* We want to insert the expression into BB only once, so
2989 note when we've inserted it. */
2990 int insn_inserted_p;
2991 occr_t occr;
2993 bitmap_initialize (from_bbs, 0);
2995 /* If an expression is computed in BB and is available at end of
2996 BB, hoist all occurrences dominated by BB to BB. */
2997 if (TEST_BIT (comp[bb->index], i))
2999 occr = find_occr_in_bb (expr->antic_occr, bb);
3001 if (occr)
3003 /* An occurrence might've been already deleted
3004 while processing a dominator of BB. */
3005 if (!occr->deleted_p)
3007 gcc_assert (NONDEBUG_INSN_P (occr->insn));
3008 hoistable++;
3011 else
3012 hoistable++;
3015 /* We've found a potentially hoistable expression, now
3016 we look at every block BB dominates to see if it
3017 computes the expression. */
3018 FOR_EACH_VEC_ELT (basic_block, domby, j, dominated)
3020 int max_distance;
3022 /* Ignore self dominance. */
3023 if (bb == dominated)
3024 continue;
3025 /* We've found a dominated block, now see if it computes
3026 the busy expression and whether or not moving that
3027 expression to the "beginning" of that block is safe. */
3028 if (!TEST_BIT (antloc[dominated->index], i))
3029 continue;
3031 occr = find_occr_in_bb (expr->antic_occr, dominated);
3032 gcc_assert (occr);
3034 /* An occurrence might've been already deleted
3035 while processing a dominator of BB. */
3036 if (occr->deleted_p)
3037 continue;
3038 gcc_assert (NONDEBUG_INSN_P (occr->insn));
3040 max_distance = expr->max_distance;
3041 if (max_distance > 0)
3042 /* Adjust MAX_DISTANCE to account for the fact that
3043 OCCR won't have to travel all of DOMINATED, but
3044 only part of it. */
3045 max_distance += (bb_size[dominated->index]
3046 - to_bb_head[INSN_UID (occr->insn)]);
3048 /* Note if the expression would reach the dominated block
3049 unimpared if it was placed at the end of BB.
3051 Keep track of how many times this expression is hoistable
3052 from a dominated block into BB. */
3053 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL,
3054 max_distance, bb_size))
3056 hoistable++;
3057 VEC_safe_push (occr_t, heap,
3058 occrs_to_hoist, occr);
3059 bitmap_set_bit (from_bbs, dominated->index);
3063 /* If we found more than one hoistable occurrence of this
3064 expression, then note it in the vector of expressions to
3065 hoist. It makes no sense to hoist things which are computed
3066 in only one BB, and doing so tends to pessimize register
3067 allocation. One could increase this value to try harder
3068 to avoid any possible code expansion due to register
3069 allocation issues; however experiments have shown that
3070 the vast majority of hoistable expressions are only movable
3071 from two successors, so raising this threshold is likely
3072 to nullify any benefit we get from code hoisting. */
3073 if (hoistable > 1 && dbg_cnt (hoist_insn))
3075 /* If (hoistable != VEC_length), then there is
3076 an occurrence of EXPR in BB itself. Don't waste
3077 time looking for LCA in this case. */
3078 if ((unsigned) hoistable
3079 == VEC_length (occr_t, occrs_to_hoist))
3081 basic_block lca;
3083 lca = nearest_common_dominator_for_set (CDI_DOMINATORS,
3084 from_bbs);
3085 if (lca != bb)
3086 /* Punt, it's better to hoist these occurrences to
3087 LCA. */
3088 VEC_free (occr_t, heap, occrs_to_hoist);
3091 else
3092 /* Punt, no point hoisting a single occurence. */
3093 VEC_free (occr_t, heap, occrs_to_hoist);
3095 insn_inserted_p = 0;
3097 /* Walk through occurrences of I'th expressions we want
3098 to hoist to BB and make the transformations. */
3099 FOR_EACH_VEC_ELT (occr_t, occrs_to_hoist, j, occr)
3101 rtx insn;
3102 rtx set;
3104 gcc_assert (!occr->deleted_p);
3106 insn = occr->insn;
3107 set = single_set (insn);
3108 gcc_assert (set);
3110 /* Create a pseudo-reg to store the result of reaching
3111 expressions into. Get the mode for the new pseudo
3112 from the mode of the original destination pseudo.
3114 It is important to use new pseudos whenever we
3115 emit a set. This will allow reload to use
3116 rematerialization for such registers. */
3117 if (!insn_inserted_p)
3118 expr->reaching_reg
3119 = gen_reg_rtx_and_attrs (SET_DEST (set));
3121 gcse_emit_move_after (SET_DEST (set), expr->reaching_reg,
3122 insn);
3123 delete_insn (insn);
3124 occr->deleted_p = 1;
3125 changed = 1;
3126 gcse_subst_count++;
3128 if (!insn_inserted_p)
3130 insert_insn_end_basic_block (expr, bb);
3131 insn_inserted_p = 1;
3135 VEC_free (occr_t, heap, occrs_to_hoist);
3136 bitmap_clear (from_bbs);
3139 VEC_free (basic_block, heap, domby);
3142 VEC_free (basic_block, heap, dom_tree_walk);
3143 free (bb_size);
3144 free (to_bb_head);
3145 free (index_map);
3147 return changed;
3150 /* Top level routine to perform one code hoisting (aka unification) pass
3152 Return nonzero if a change was made. */
3154 static int
3155 one_code_hoisting_pass (void)
3157 int changed = 0;
3159 gcse_subst_count = 0;
3160 gcse_create_count = 0;
3162 /* Return if there's nothing to do, or it is too expensive. */
3163 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
3164 || is_too_expensive (_("GCSE disabled")))
3165 return 0;
3167 doing_code_hoisting_p = true;
3169 /* We need alias. */
3170 init_alias_analysis ();
3172 bytes_used = 0;
3173 gcc_obstack_init (&gcse_obstack);
3174 alloc_gcse_mem ();
3176 alloc_hash_table (&expr_hash_table);
3177 compute_hash_table (&expr_hash_table);
3178 if (dump_file)
3179 dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table);
3181 if (expr_hash_table.n_elems > 0)
3183 alloc_code_hoist_mem (last_basic_block, expr_hash_table.n_elems);
3184 compute_code_hoist_data ();
3185 changed = hoist_code ();
3186 free_code_hoist_mem ();
3189 free_hash_table (&expr_hash_table);
3190 free_gcse_mem ();
3191 obstack_free (&gcse_obstack, NULL);
3193 /* We are finished with alias. */
3194 end_alias_analysis ();
3196 if (dump_file)
3198 fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ",
3199 current_function_name (), n_basic_blocks, bytes_used);
3200 fprintf (dump_file, "%d substs, %d insns created\n",
3201 gcse_subst_count, gcse_create_count);
3204 doing_code_hoisting_p = false;
3206 return changed;
3209 /* Here we provide the things required to do store motion towards the exit.
3210 In order for this to be effective, gcse also needed to be taught how to
3211 move a load when it is killed only by a store to itself.
3213 int i;
3214 float a[10];
3216 void foo(float scale)
3218 for (i=0; i<10; i++)
3219 a[i] *= scale;
3222 'i' is both loaded and stored to in the loop. Normally, gcse cannot move
3223 the load out since its live around the loop, and stored at the bottom
3224 of the loop.
3226 The 'Load Motion' referred to and implemented in this file is
3227 an enhancement to gcse which when using edge based LCM, recognizes
3228 this situation and allows gcse to move the load out of the loop.
3230 Once gcse has hoisted the load, store motion can then push this
3231 load towards the exit, and we end up with no loads or stores of 'i'
3232 in the loop. */
3234 static hashval_t
3235 pre_ldst_expr_hash (const void *p)
3237 int do_not_record_p = 0;
3238 const struct ls_expr *const x = (const struct ls_expr *) p;
3239 return
3240 hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false);
3243 static int
3244 pre_ldst_expr_eq (const void *p1, const void *p2)
3246 const struct ls_expr *const ptr1 = (const struct ls_expr *) p1,
3247 *const ptr2 = (const struct ls_expr *) p2;
3248 return expr_equiv_p (ptr1->pattern, ptr2->pattern);
3251 /* This will search the ldst list for a matching expression. If it
3252 doesn't find one, we create one and initialize it. */
3254 static struct ls_expr *
3255 ldst_entry (rtx x)
3257 int do_not_record_p = 0;
3258 struct ls_expr * ptr;
3259 unsigned int hash;
3260 void **slot;
3261 struct ls_expr e;
3263 hash = hash_rtx (x, GET_MODE (x), &do_not_record_p,
3264 NULL, /*have_reg_qty=*/false);
3266 e.pattern = x;
3267 slot = htab_find_slot_with_hash (pre_ldst_table, &e, hash, INSERT);
3268 if (*slot)
3269 return (struct ls_expr *)*slot;
3271 ptr = XNEW (struct ls_expr);
3273 ptr->next = pre_ldst_mems;
3274 ptr->expr = NULL;
3275 ptr->pattern = x;
3276 ptr->pattern_regs = NULL_RTX;
3277 ptr->loads = NULL_RTX;
3278 ptr->stores = NULL_RTX;
3279 ptr->reaching_reg = NULL_RTX;
3280 ptr->invalid = 0;
3281 ptr->index = 0;
3282 ptr->hash_index = hash;
3283 pre_ldst_mems = ptr;
3284 *slot = ptr;
3286 return ptr;
3289 /* Free up an individual ldst entry. */
3291 static void
3292 free_ldst_entry (struct ls_expr * ptr)
3294 free_INSN_LIST_list (& ptr->loads);
3295 free_INSN_LIST_list (& ptr->stores);
3297 free (ptr);
3300 /* Free up all memory associated with the ldst list. */
3302 static void
3303 free_ld_motion_mems (void)
3305 if (pre_ldst_table)
3306 htab_delete (pre_ldst_table);
3307 pre_ldst_table = NULL;
3309 while (pre_ldst_mems)
3311 struct ls_expr * tmp = pre_ldst_mems;
3313 pre_ldst_mems = pre_ldst_mems->next;
3315 free_ldst_entry (tmp);
3318 pre_ldst_mems = NULL;
3321 /* Dump debugging info about the ldst list. */
3323 static void
3324 print_ldst_list (FILE * file)
3326 struct ls_expr * ptr;
3328 fprintf (file, "LDST list: \n");
3330 for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
3332 fprintf (file, " Pattern (%3d): ", ptr->index);
3334 print_rtl (file, ptr->pattern);
3336 fprintf (file, "\n Loads : ");
3338 if (ptr->loads)
3339 print_rtl (file, ptr->loads);
3340 else
3341 fprintf (file, "(nil)");
3343 fprintf (file, "\n Stores : ");
3345 if (ptr->stores)
3346 print_rtl (file, ptr->stores);
3347 else
3348 fprintf (file, "(nil)");
3350 fprintf (file, "\n\n");
3353 fprintf (file, "\n");
3356 /* Returns 1 if X is in the list of ldst only expressions. */
3358 static struct ls_expr *
3359 find_rtx_in_ldst (rtx x)
3361 struct ls_expr e;
3362 void **slot;
3363 if (!pre_ldst_table)
3364 return NULL;
3365 e.pattern = x;
3366 slot = htab_find_slot (pre_ldst_table, &e, NO_INSERT);
3367 if (!slot || ((struct ls_expr *)*slot)->invalid)
3368 return NULL;
3369 return (struct ls_expr *) *slot;
3372 /* Load Motion for loads which only kill themselves. */
3374 /* Return true if x, a MEM, is a simple access with no side effects.
3375 These are the types of loads we consider for the ld_motion list,
3376 otherwise we let the usual aliasing take care of it. */
3378 static int
3379 simple_mem (const_rtx x)
3381 if (MEM_VOLATILE_P (x))
3382 return 0;
3384 if (GET_MODE (x) == BLKmode)
3385 return 0;
3387 /* If we are handling exceptions, we must be careful with memory references
3388 that may trap. If we are not, the behavior is undefined, so we may just
3389 continue. */
3390 if (cfun->can_throw_non_call_exceptions && may_trap_p (x))
3391 return 0;
3393 if (side_effects_p (x))
3394 return 0;
3396 /* Do not consider function arguments passed on stack. */
3397 if (reg_mentioned_p (stack_pointer_rtx, x))
3398 return 0;
3400 if (flag_float_store && FLOAT_MODE_P (GET_MODE (x)))
3401 return 0;
3403 return 1;
3406 /* Make sure there isn't a buried reference in this pattern anywhere.
3407 If there is, invalidate the entry for it since we're not capable
3408 of fixing it up just yet.. We have to be sure we know about ALL
3409 loads since the aliasing code will allow all entries in the
3410 ld_motion list to not-alias itself. If we miss a load, we will get
3411 the wrong value since gcse might common it and we won't know to
3412 fix it up. */
3414 static void
3415 invalidate_any_buried_refs (rtx x)
3417 const char * fmt;
3418 int i, j;
3419 struct ls_expr * ptr;
3421 /* Invalidate it in the list. */
3422 if (MEM_P (x) && simple_mem (x))
3424 ptr = ldst_entry (x);
3425 ptr->invalid = 1;
3428 /* Recursively process the insn. */
3429 fmt = GET_RTX_FORMAT (GET_CODE (x));
3431 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3433 if (fmt[i] == 'e')
3434 invalidate_any_buried_refs (XEXP (x, i));
3435 else if (fmt[i] == 'E')
3436 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3437 invalidate_any_buried_refs (XVECEXP (x, i, j));
3441 /* Find all the 'simple' MEMs which are used in LOADs and STORES. Simple
3442 being defined as MEM loads and stores to symbols, with no side effects
3443 and no registers in the expression. For a MEM destination, we also
3444 check that the insn is still valid if we replace the destination with a
3445 REG, as is done in update_ld_motion_stores. If there are any uses/defs
3446 which don't match this criteria, they are invalidated and trimmed out
3447 later. */
3449 static void
3450 compute_ld_motion_mems (void)
3452 struct ls_expr * ptr;
3453 basic_block bb;
3454 rtx insn;
3456 pre_ldst_mems = NULL;
3457 pre_ldst_table
3458 = htab_create (13, pre_ldst_expr_hash, pre_ldst_expr_eq, NULL);
3460 FOR_EACH_BB (bb)
3462 FOR_BB_INSNS (bb, insn)
3464 if (NONDEBUG_INSN_P (insn))
3466 if (GET_CODE (PATTERN (insn)) == SET)
3468 rtx src = SET_SRC (PATTERN (insn));
3469 rtx dest = SET_DEST (PATTERN (insn));
3471 /* Check for a simple LOAD... */
3472 if (MEM_P (src) && simple_mem (src))
3474 ptr = ldst_entry (src);
3475 if (REG_P (dest))
3476 ptr->loads = alloc_INSN_LIST (insn, ptr->loads);
3477 else
3478 ptr->invalid = 1;
3480 else
3482 /* Make sure there isn't a buried load somewhere. */
3483 invalidate_any_buried_refs (src);
3486 /* Check for stores. Don't worry about aliased ones, they
3487 will block any movement we might do later. We only care
3488 about this exact pattern since those are the only
3489 circumstance that we will ignore the aliasing info. */
3490 if (MEM_P (dest) && simple_mem (dest))
3492 ptr = ldst_entry (dest);
3494 if (! MEM_P (src)
3495 && GET_CODE (src) != ASM_OPERANDS
3496 /* Check for REG manually since want_to_gcse_p
3497 returns 0 for all REGs. */
3498 && can_assign_to_reg_without_clobbers_p (src))
3499 ptr->stores = alloc_INSN_LIST (insn, ptr->stores);
3500 else
3501 ptr->invalid = 1;
3504 else
3505 invalidate_any_buried_refs (PATTERN (insn));
3511 /* Remove any references that have been either invalidated or are not in the
3512 expression list for pre gcse. */
3514 static void
3515 trim_ld_motion_mems (void)
3517 struct ls_expr * * last = & pre_ldst_mems;
3518 struct ls_expr * ptr = pre_ldst_mems;
3520 while (ptr != NULL)
3522 struct expr * expr;
3524 /* Delete if entry has been made invalid. */
3525 if (! ptr->invalid)
3527 /* Delete if we cannot find this mem in the expression list. */
3528 unsigned int hash = ptr->hash_index % expr_hash_table.size;
3530 for (expr = expr_hash_table.table[hash];
3531 expr != NULL;
3532 expr = expr->next_same_hash)
3533 if (expr_equiv_p (expr->expr, ptr->pattern))
3534 break;
3536 else
3537 expr = (struct expr *) 0;
3539 if (expr)
3541 /* Set the expression field if we are keeping it. */
3542 ptr->expr = expr;
3543 last = & ptr->next;
3544 ptr = ptr->next;
3546 else
3548 *last = ptr->next;
3549 htab_remove_elt_with_hash (pre_ldst_table, ptr, ptr->hash_index);
3550 free_ldst_entry (ptr);
3551 ptr = * last;
3555 /* Show the world what we've found. */
3556 if (dump_file && pre_ldst_mems != NULL)
3557 print_ldst_list (dump_file);
3560 /* This routine will take an expression which we are replacing with
3561 a reaching register, and update any stores that are needed if
3562 that expression is in the ld_motion list. Stores are updated by
3563 copying their SRC to the reaching register, and then storing
3564 the reaching register into the store location. These keeps the
3565 correct value in the reaching register for the loads. */
3567 static void
3568 update_ld_motion_stores (struct expr * expr)
3570 struct ls_expr * mem_ptr;
3572 if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
3574 /* We can try to find just the REACHED stores, but is shouldn't
3575 matter to set the reaching reg everywhere... some might be
3576 dead and should be eliminated later. */
3578 /* We replace (set mem expr) with (set reg expr) (set mem reg)
3579 where reg is the reaching reg used in the load. We checked in
3580 compute_ld_motion_mems that we can replace (set mem expr) with
3581 (set reg expr) in that insn. */
3582 rtx list = mem_ptr->stores;
3584 for ( ; list != NULL_RTX; list = XEXP (list, 1))
3586 rtx insn = XEXP (list, 0);
3587 rtx pat = PATTERN (insn);
3588 rtx src = SET_SRC (pat);
3589 rtx reg = expr->reaching_reg;
3590 rtx copy;
3592 /* If we've already copied it, continue. */
3593 if (expr->reaching_reg == src)
3594 continue;
3596 if (dump_file)
3598 fprintf (dump_file, "PRE: store updated with reaching reg ");
3599 print_rtl (dump_file, reg);
3600 fprintf (dump_file, ":\n ");
3601 print_inline_rtx (dump_file, insn, 8);
3602 fprintf (dump_file, "\n");
3605 copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat)));
3606 emit_insn_before (copy, insn);
3607 SET_SRC (pat) = reg;
3608 df_insn_rescan (insn);
3610 /* un-recognize this pattern since it's probably different now. */
3611 INSN_CODE (insn) = -1;
3612 gcse_create_count++;
3617 /* Return true if the graph is too expensive to optimize. PASS is the
3618 optimization about to be performed. */
3620 static bool
3621 is_too_expensive (const char *pass)
3623 /* Trying to perform global optimizations on flow graphs which have
3624 a high connectivity will take a long time and is unlikely to be
3625 particularly useful.
3627 In normal circumstances a cfg should have about twice as many
3628 edges as blocks. But we do not want to punish small functions
3629 which have a couple switch statements. Rather than simply
3630 threshold the number of blocks, uses something with a more
3631 graceful degradation. */
3632 if (n_edges > 20000 + n_basic_blocks * 4)
3634 warning (OPT_Wdisabled_optimization,
3635 "%s: %d basic blocks and %d edges/basic block",
3636 pass, n_basic_blocks, n_edges / n_basic_blocks);
3638 return true;
3641 /* If allocating memory for the dataflow bitmaps would take up too much
3642 storage it's better just to disable the optimization. */
3643 if ((n_basic_blocks
3644 * SBITMAP_SET_SIZE (max_reg_num ())
3645 * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
3647 warning (OPT_Wdisabled_optimization,
3648 "%s: %d basic blocks and %d registers",
3649 pass, n_basic_blocks, max_reg_num ());
3651 return true;
3654 return false;
3657 /* All the passes implemented in this file. Each pass has its
3658 own gate and execute function, and at the end of the file a
3659 pass definition for passes.c.
3661 We do not construct an accurate cfg in functions which call
3662 setjmp, so none of these passes runs if the function calls
3663 setjmp.
3664 FIXME: Should just handle setjmp via REG_SETJMP notes. */
3666 static bool
3667 gate_rtl_pre (void)
3669 return optimize > 0 && flag_gcse
3670 && !cfun->calls_setjmp
3671 && optimize_function_for_speed_p (cfun)
3672 && dbg_cnt (pre);
3675 static unsigned int
3676 execute_rtl_pre (void)
3678 int changed;
3679 delete_unreachable_blocks ();
3680 df_analyze ();
3681 changed = one_pre_gcse_pass ();
3682 flag_rerun_cse_after_global_opts |= changed;
3683 if (changed)
3684 cleanup_cfg (0);
3685 return 0;
3688 static bool
3689 gate_rtl_hoist (void)
3691 return optimize > 0 && flag_gcse
3692 && !cfun->calls_setjmp
3693 /* It does not make sense to run code hoisting unless we are optimizing
3694 for code size -- it rarely makes programs faster, and can make then
3695 bigger if we did PRE (when optimizing for space, we don't run PRE). */
3696 && optimize_function_for_size_p (cfun)
3697 && dbg_cnt (hoist);
3700 static unsigned int
3701 execute_rtl_hoist (void)
3703 int changed;
3704 delete_unreachable_blocks ();
3705 df_analyze ();
3706 changed = one_code_hoisting_pass ();
3707 flag_rerun_cse_after_global_opts |= changed;
3708 if (changed)
3709 cleanup_cfg (0);
3710 return 0;
3713 struct rtl_opt_pass pass_rtl_pre =
3716 RTL_PASS,
3717 "rtl pre", /* name */
3718 gate_rtl_pre, /* gate */
3719 execute_rtl_pre, /* execute */
3720 NULL, /* sub */
3721 NULL, /* next */
3722 0, /* static_pass_number */
3723 TV_PRE, /* tv_id */
3724 PROP_cfglayout, /* properties_required */
3725 0, /* properties_provided */
3726 0, /* properties_destroyed */
3727 0, /* todo_flags_start */
3728 TODO_df_finish | TODO_verify_rtl_sharing |
3729 TODO_verify_flow | TODO_ggc_collect /* todo_flags_finish */
3733 struct rtl_opt_pass pass_rtl_hoist =
3736 RTL_PASS,
3737 "hoist", /* name */
3738 gate_rtl_hoist, /* gate */
3739 execute_rtl_hoist, /* execute */
3740 NULL, /* sub */
3741 NULL, /* next */
3742 0, /* static_pass_number */
3743 TV_HOIST, /* tv_id */
3744 PROP_cfglayout, /* properties_required */
3745 0, /* properties_provided */
3746 0, /* properties_destroyed */
3747 0, /* todo_flags_start */
3748 TODO_df_finish | TODO_verify_rtl_sharing |
3749 TODO_verify_flow | TODO_ggc_collect /* todo_flags_finish */
3753 #include "gt-gcse.h"