re PR bootstrap/54281 (Fails to bootstrap with --disable-nls)
[official-gcc.git] / gcc / gcse.c
blobfd285de5d88b703d1262ff34839180a9cac365e7
1 /* Partial redundancy elimination / Hoisting for RTL.
2 Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
3 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* TODO
22 - reordering of memory allocation and freeing to be more space efficient
23 - do rough calc of how many regs are needed in each block, and a rough
24 calc of how many regs are available in each class and use that to
25 throttle back the code in cases where RTX_COST is minimal.
28 /* References searched while implementing this.
30 Compilers Principles, Techniques and Tools
31 Aho, Sethi, Ullman
32 Addison-Wesley, 1988
34 Global Optimization by Suppression of Partial Redundancies
35 E. Morel, C. Renvoise
36 communications of the acm, Vol. 22, Num. 2, Feb. 1979
38 A Portable Machine-Independent Global Optimizer - Design and Measurements
39 Frederick Chow
40 Stanford Ph.D. thesis, Dec. 1983
42 A Fast Algorithm for Code Movement Optimization
43 D.M. Dhamdhere
44 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
46 A Solution to a Problem with Morel and Renvoise's
47 Global Optimization by Suppression of Partial Redundancies
48 K-H Drechsler, M.P. Stadel
49 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
51 Practical Adaptation of the Global Optimization
52 Algorithm of Morel and Renvoise
53 D.M. Dhamdhere
54 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
56 Efficiently Computing Static Single Assignment Form and the Control
57 Dependence Graph
58 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
59 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
61 Lazy Code Motion
62 J. Knoop, O. Ruthing, B. Steffen
63 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
65 What's In a Region? Or Computing Control Dependence Regions in Near-Linear
66 Time for Reducible Flow Control
67 Thomas Ball
68 ACM Letters on Programming Languages and Systems,
69 Vol. 2, Num. 1-4, Mar-Dec 1993
71 An Efficient Representation for Sparse Sets
72 Preston Briggs, Linda Torczon
73 ACM Letters on Programming Languages and Systems,
74 Vol. 2, Num. 1-4, Mar-Dec 1993
76 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
77 K-H Drechsler, M.P. Stadel
78 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
80 Partial Dead Code Elimination
81 J. Knoop, O. Ruthing, B. Steffen
82 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
84 Effective Partial Redundancy Elimination
85 P. Briggs, K.D. Cooper
86 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
88 The Program Structure Tree: Computing Control Regions in Linear Time
89 R. Johnson, D. Pearson, K. Pingali
90 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
92 Optimal Code Motion: Theory and Practice
93 J. Knoop, O. Ruthing, B. Steffen
94 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
96 The power of assignment motion
97 J. Knoop, O. Ruthing, B. Steffen
98 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
100 Global code motion / global value numbering
101 C. Click
102 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
104 Value Driven Redundancy Elimination
105 L.T. Simpson
106 Rice University Ph.D. thesis, Apr. 1996
108 Value Numbering
109 L.T. Simpson
110 Massively Scalar Compiler Project, Rice University, Sep. 1996
112 High Performance Compilers for Parallel Computing
113 Michael Wolfe
114 Addison-Wesley, 1996
116 Advanced Compiler Design and Implementation
117 Steven Muchnick
118 Morgan Kaufmann, 1997
120 Building an Optimizing Compiler
121 Robert Morgan
122 Digital Press, 1998
124 People wishing to speed up the code here should read:
125 Elimination Algorithms for Data Flow Analysis
126 B.G. Ryder, M.C. Paull
127 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
129 How to Analyze Large Programs Efficiently and Informatively
130 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
131 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
133 People wishing to do something different can find various possibilities
134 in the above papers and elsewhere.
137 #include "config.h"
138 #include "system.h"
139 #include "coretypes.h"
140 #include "tm.h"
141 #include "diagnostic-core.h"
142 #include "toplev.h"
144 #include "rtl.h"
145 #include "tree.h"
146 #include "tm_p.h"
147 #include "regs.h"
148 #include "hard-reg-set.h"
149 #include "flags.h"
150 #include "insn-config.h"
151 #include "recog.h"
152 #include "basic-block.h"
153 #include "function.h"
154 #include "expr.h"
155 #include "except.h"
156 #include "ggc.h"
157 #include "params.h"
158 #include "cselib.h"
159 #include "intl.h"
160 #include "obstack.h"
161 #include "tree-pass.h"
162 #include "hashtab.h"
163 #include "df.h"
164 #include "dbgcnt.h"
165 #include "target.h"
166 #include "gcse.h"
168 /* We support GCSE via Partial Redundancy Elimination. PRE optimizations
169 are a superset of those done by classic GCSE.
171 Two passes of copy/constant propagation are done around PRE or hoisting
172 because the first one enables more GCSE and the second one helps to clean
173 up the copies that PRE and HOIST create. This is needed more for PRE than
174 for HOIST because code hoisting will try to use an existing register
175 containing the common subexpression rather than create a new one. This is
176 harder to do for PRE because of the code motion (which HOIST doesn't do).
178 Expressions we are interested in GCSE-ing are of the form
179 (set (pseudo-reg) (expression)).
180 Function want_to_gcse_p says what these are.
182 In addition, expressions in REG_EQUAL notes are candidates for GCSE-ing.
183 This allows PRE to hoist expressions that are expressed in multiple insns,
184 such as complex address calculations (e.g. for PIC code, or loads with a
185 high part and a low part).
187 PRE handles moving invariant expressions out of loops (by treating them as
188 partially redundant).
190 **********************
192 We used to support multiple passes but there are diminishing returns in
193 doing so. The first pass usually makes 90% of the changes that are doable.
194 A second pass can make a few more changes made possible by the first pass.
195 Experiments show any further passes don't make enough changes to justify
196 the expense.
198 A study of spec92 using an unlimited number of passes:
199 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
200 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
201 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
203 It was found doing copy propagation between each pass enables further
204 substitutions.
206 This study was done before expressions in REG_EQUAL notes were added as
207 candidate expressions for optimization, and before the GIMPLE optimizers
208 were added. Probably, multiple passes is even less efficient now than
209 at the time when the study was conducted.
211 PRE is quite expensive in complicated functions because the DFA can take
212 a while to converge. Hence we only perform one pass.
214 **********************
216 The steps for PRE are:
218 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
220 2) Perform the data flow analysis for PRE.
222 3) Delete the redundant instructions
224 4) Insert the required copies [if any] that make the partially
225 redundant instructions fully redundant.
227 5) For other reaching expressions, insert an instruction to copy the value
228 to a newly created pseudo that will reach the redundant instruction.
230 The deletion is done first so that when we do insertions we
231 know which pseudo reg to use.
233 Various papers have argued that PRE DFA is expensive (O(n^2)) and others
234 argue it is not. The number of iterations for the algorithm to converge
235 is typically 2-4 so I don't view it as that expensive (relatively speaking).
237 PRE GCSE depends heavily on the second CPROP pass to clean up the copies
238 we create. To make an expression reach the place where it's redundant,
239 the result of the expression is copied to a new register, and the redundant
240 expression is deleted by replacing it with this new register. Classic GCSE
241 doesn't have this problem as much as it computes the reaching defs of
242 each register in each block and thus can try to use an existing
243 register. */
245 /* GCSE global vars. */
247 struct target_gcse default_target_gcse;
248 #if SWITCHABLE_TARGET
249 struct target_gcse *this_target_gcse = &default_target_gcse;
250 #endif
252 /* Set to non-zero if CSE should run after all GCSE optimizations are done. */
253 int flag_rerun_cse_after_global_opts;
255 /* An obstack for our working variables. */
256 static struct obstack gcse_obstack;
258 struct reg_use {rtx reg_rtx; };
260 /* Hash table of expressions. */
262 struct expr
264 /* The expression. */
265 rtx expr;
266 /* Index in the available expression bitmaps. */
267 int bitmap_index;
268 /* Next entry with the same hash. */
269 struct expr *next_same_hash;
270 /* List of anticipatable occurrences in basic blocks in the function.
271 An "anticipatable occurrence" is one that is the first occurrence in the
272 basic block, the operands are not modified in the basic block prior
273 to the occurrence and the output is not used between the start of
274 the block and the occurrence. */
275 struct occr *antic_occr;
276 /* List of available occurrence in basic blocks in the function.
277 An "available occurrence" is one that is the last occurrence in the
278 basic block and the operands are not modified by following statements in
279 the basic block [including this insn]. */
280 struct occr *avail_occr;
281 /* Non-null if the computation is PRE redundant.
282 The value is the newly created pseudo-reg to record a copy of the
283 expression in all the places that reach the redundant copy. */
284 rtx reaching_reg;
285 /* Maximum distance in instructions this expression can travel.
286 We avoid moving simple expressions for more than a few instructions
287 to keep register pressure under control.
288 A value of "0" removes restrictions on how far the expression can
289 travel. */
290 int max_distance;
293 /* Occurrence of an expression.
294 There is one per basic block. If a pattern appears more than once the
295 last appearance is used [or first for anticipatable expressions]. */
297 struct occr
299 /* Next occurrence of this expression. */
300 struct occr *next;
301 /* The insn that computes the expression. */
302 rtx insn;
303 /* Nonzero if this [anticipatable] occurrence has been deleted. */
304 char deleted_p;
305 /* Nonzero if this [available] occurrence has been copied to
306 reaching_reg. */
307 /* ??? This is mutually exclusive with deleted_p, so they could share
308 the same byte. */
309 char copied_p;
312 typedef struct occr *occr_t;
313 DEF_VEC_P (occr_t);
314 DEF_VEC_ALLOC_P (occr_t, heap);
316 /* Expression hash tables.
317 Each hash table is an array of buckets.
318 ??? It is known that if it were an array of entries, structure elements
319 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
320 not clear whether in the final analysis a sufficient amount of memory would
321 be saved as the size of the available expression bitmaps would be larger
322 [one could build a mapping table without holes afterwards though].
323 Someday I'll perform the computation and figure it out. */
325 struct hash_table_d
327 /* The table itself.
328 This is an array of `expr_hash_table_size' elements. */
329 struct expr **table;
331 /* Size of the hash table, in elements. */
332 unsigned int size;
334 /* Number of hash table elements. */
335 unsigned int n_elems;
338 /* Expression hash table. */
339 static struct hash_table_d expr_hash_table;
341 /* This is a list of expressions which are MEMs and will be used by load
342 or store motion.
343 Load motion tracks MEMs which aren't killed by anything except itself,
344 i.e. loads and stores to a single location.
345 We can then allow movement of these MEM refs with a little special
346 allowance. (all stores copy the same value to the reaching reg used
347 for the loads). This means all values used to store into memory must have
348 no side effects so we can re-issue the setter value. */
350 struct ls_expr
352 struct expr * expr; /* Gcse expression reference for LM. */
353 rtx pattern; /* Pattern of this mem. */
354 rtx pattern_regs; /* List of registers mentioned by the mem. */
355 rtx loads; /* INSN list of loads seen. */
356 rtx stores; /* INSN list of stores seen. */
357 struct ls_expr * next; /* Next in the list. */
358 int invalid; /* Invalid for some reason. */
359 int index; /* If it maps to a bitmap index. */
360 unsigned int hash_index; /* Index when in a hash table. */
361 rtx reaching_reg; /* Register to use when re-writing. */
364 /* Head of the list of load/store memory refs. */
365 static struct ls_expr * pre_ldst_mems = NULL;
367 /* Hashtable for the load/store memory refs. */
368 static htab_t pre_ldst_table = NULL;
370 /* Bitmap containing one bit for each register in the program.
371 Used when performing GCSE to track which registers have been set since
372 the start of the basic block. */
373 static regset reg_set_bitmap;
375 /* Array, indexed by basic block number for a list of insns which modify
376 memory within that block. */
377 static VEC (rtx,heap) **modify_mem_list;
378 static bitmap modify_mem_list_set;
380 typedef struct modify_pair_s
382 rtx dest; /* A MEM. */
383 rtx dest_addr; /* The canonical address of `dest'. */
384 } modify_pair;
386 DEF_VEC_O(modify_pair);
387 DEF_VEC_ALLOC_O(modify_pair,heap);
389 /* This array parallels modify_mem_list, except that it stores MEMs
390 being set and their canonicalized memory addresses. */
391 static VEC (modify_pair,heap) **canon_modify_mem_list;
393 /* Bitmap indexed by block numbers to record which blocks contain
394 function calls. */
395 static bitmap blocks_with_calls;
397 /* Various variables for statistics gathering. */
399 /* Memory used in a pass.
400 This isn't intended to be absolutely precise. Its intent is only
401 to keep an eye on memory usage. */
402 static int bytes_used;
404 /* GCSE substitutions made. */
405 static int gcse_subst_count;
406 /* Number of copy instructions created. */
407 static int gcse_create_count;
409 /* Doing code hoisting. */
410 static bool doing_code_hoisting_p = false;
412 /* For available exprs */
413 static sbitmap *ae_kill;
415 static void compute_can_copy (void);
416 static void *gmalloc (size_t) ATTRIBUTE_MALLOC;
417 static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC;
418 static void *gcse_alloc (unsigned long);
419 static void alloc_gcse_mem (void);
420 static void free_gcse_mem (void);
421 static void hash_scan_insn (rtx, struct hash_table_d *);
422 static void hash_scan_set (rtx, rtx, struct hash_table_d *);
423 static void hash_scan_clobber (rtx, rtx, struct hash_table_d *);
424 static void hash_scan_call (rtx, rtx, struct hash_table_d *);
425 static int want_to_gcse_p (rtx, int *);
426 static int oprs_unchanged_p (const_rtx, const_rtx, int);
427 static int oprs_anticipatable_p (const_rtx, const_rtx);
428 static int oprs_available_p (const_rtx, const_rtx);
429 static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, int,
430 struct hash_table_d *);
431 static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int);
432 static int expr_equiv_p (const_rtx, const_rtx);
433 static void record_last_reg_set_info (rtx, int);
434 static void record_last_mem_set_info (rtx);
435 static void record_last_set_info (rtx, const_rtx, void *);
436 static void compute_hash_table (struct hash_table_d *);
437 static void alloc_hash_table (struct hash_table_d *);
438 static void free_hash_table (struct hash_table_d *);
439 static void compute_hash_table_work (struct hash_table_d *);
440 static void dump_hash_table (FILE *, const char *, struct hash_table_d *);
441 static void compute_transp (const_rtx, int, sbitmap *);
442 static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
443 struct hash_table_d *);
444 static void mems_conflict_for_gcse_p (rtx, const_rtx, void *);
445 static int load_killed_in_block_p (const_basic_block, int, const_rtx, int);
446 static void canon_list_insert (rtx, const_rtx, void *);
447 static void alloc_pre_mem (int, int);
448 static void free_pre_mem (void);
449 static struct edge_list *compute_pre_data (void);
450 static int pre_expr_reaches_here_p (basic_block, struct expr *,
451 basic_block);
452 static void insert_insn_end_basic_block (struct expr *, basic_block);
453 static void pre_insert_copy_insn (struct expr *, rtx);
454 static void pre_insert_copies (void);
455 static int pre_delete (void);
456 static int pre_gcse (struct edge_list *);
457 static int one_pre_gcse_pass (void);
458 static void add_label_notes (rtx, rtx);
459 static void alloc_code_hoist_mem (int, int);
460 static void free_code_hoist_mem (void);
461 static void compute_code_hoist_vbeinout (void);
462 static void compute_code_hoist_data (void);
463 static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *,
464 int, int *);
465 static int hoist_code (void);
466 static int one_code_hoisting_pass (void);
467 static rtx process_insert_insn (struct expr *);
468 static int pre_edge_insert (struct edge_list *, struct expr **);
469 static int pre_expr_reaches_here_p_work (basic_block, struct expr *,
470 basic_block, char *);
471 static struct ls_expr * ldst_entry (rtx);
472 static void free_ldst_entry (struct ls_expr *);
473 static void free_ld_motion_mems (void);
474 static void print_ldst_list (FILE *);
475 static struct ls_expr * find_rtx_in_ldst (rtx);
476 static int simple_mem (const_rtx);
477 static void invalidate_any_buried_refs (rtx);
478 static void compute_ld_motion_mems (void);
479 static void trim_ld_motion_mems (void);
480 static void update_ld_motion_stores (struct expr *);
481 static void clear_modify_mem_tables (void);
482 static void free_modify_mem_tables (void);
483 static rtx gcse_emit_move_after (rtx, rtx, rtx);
484 static bool is_too_expensive (const char *);
486 #define GNEW(T) ((T *) gmalloc (sizeof (T)))
487 #define GCNEW(T) ((T *) gcalloc (1, sizeof (T)))
489 #define GNEWVEC(T, N) ((T *) gmalloc (sizeof (T) * (N)))
490 #define GCNEWVEC(T, N) ((T *) gcalloc ((N), sizeof (T)))
492 #define GNEWVAR(T, S) ((T *) gmalloc ((S)))
493 #define GCNEWVAR(T, S) ((T *) gcalloc (1, (S)))
495 #define GOBNEW(T) ((T *) gcse_alloc (sizeof (T)))
496 #define GOBNEWVAR(T, S) ((T *) gcse_alloc ((S)))
498 /* Misc. utilities. */
500 #define can_copy \
501 (this_target_gcse->x_can_copy)
502 #define can_copy_init_p \
503 (this_target_gcse->x_can_copy_init_p)
505 /* Compute which modes support reg/reg copy operations. */
507 static void
508 compute_can_copy (void)
510 int i;
511 #ifndef AVOID_CCMODE_COPIES
512 rtx reg, insn;
513 #endif
514 memset (can_copy, 0, NUM_MACHINE_MODES);
516 start_sequence ();
517 for (i = 0; i < NUM_MACHINE_MODES; i++)
518 if (GET_MODE_CLASS (i) == MODE_CC)
520 #ifdef AVOID_CCMODE_COPIES
521 can_copy[i] = 0;
522 #else
523 reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
524 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
525 if (recog (PATTERN (insn), insn, NULL) >= 0)
526 can_copy[i] = 1;
527 #endif
529 else
530 can_copy[i] = 1;
532 end_sequence ();
535 /* Returns whether the mode supports reg/reg copy operations. */
537 bool
538 can_copy_p (enum machine_mode mode)
540 if (! can_copy_init_p)
542 compute_can_copy ();
543 can_copy_init_p = true;
546 return can_copy[mode] != 0;
549 /* Cover function to xmalloc to record bytes allocated. */
551 static void *
552 gmalloc (size_t size)
554 bytes_used += size;
555 return xmalloc (size);
558 /* Cover function to xcalloc to record bytes allocated. */
560 static void *
561 gcalloc (size_t nelem, size_t elsize)
563 bytes_used += nelem * elsize;
564 return xcalloc (nelem, elsize);
567 /* Cover function to obstack_alloc. */
569 static void *
570 gcse_alloc (unsigned long size)
572 bytes_used += size;
573 return obstack_alloc (&gcse_obstack, size);
576 /* Allocate memory for the reg/memory set tracking tables.
577 This is called at the start of each pass. */
579 static void
580 alloc_gcse_mem (void)
582 /* Allocate vars to track sets of regs. */
583 reg_set_bitmap = ALLOC_REG_SET (NULL);
585 /* Allocate array to keep a list of insns which modify memory in each
586 basic block. */
587 modify_mem_list = GCNEWVEC (VEC (rtx,heap) *, last_basic_block);
588 canon_modify_mem_list = GCNEWVEC (VEC (modify_pair,heap) *,
589 last_basic_block);
590 modify_mem_list_set = BITMAP_ALLOC (NULL);
591 blocks_with_calls = BITMAP_ALLOC (NULL);
594 /* Free memory allocated by alloc_gcse_mem. */
596 static void
597 free_gcse_mem (void)
599 FREE_REG_SET (reg_set_bitmap);
601 free_modify_mem_tables ();
602 BITMAP_FREE (modify_mem_list_set);
603 BITMAP_FREE (blocks_with_calls);
606 /* Compute the local properties of each recorded expression.
608 Local properties are those that are defined by the block, irrespective of
609 other blocks.
611 An expression is transparent in a block if its operands are not modified
612 in the block.
614 An expression is computed (locally available) in a block if it is computed
615 at least once and expression would contain the same value if the
616 computation was moved to the end of the block.
618 An expression is locally anticipatable in a block if it is computed at
619 least once and expression would contain the same value if the computation
620 was moved to the beginning of the block.
622 We call this routine for pre and code hoisting. They all compute
623 basically the same information and thus can easily share this code.
625 TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
626 properties. If NULL, then it is not necessary to compute or record that
627 particular property.
629 TABLE controls which hash table to look at. */
631 static void
632 compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc,
633 struct hash_table_d *table)
635 unsigned int i;
637 /* Initialize any bitmaps that were passed in. */
638 if (transp)
640 sbitmap_vector_ones (transp, last_basic_block);
643 if (comp)
644 sbitmap_vector_zero (comp, last_basic_block);
645 if (antloc)
646 sbitmap_vector_zero (antloc, last_basic_block);
648 for (i = 0; i < table->size; i++)
650 struct expr *expr;
652 for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
654 int indx = expr->bitmap_index;
655 struct occr *occr;
657 /* The expression is transparent in this block if it is not killed.
658 We start by assuming all are transparent [none are killed], and
659 then reset the bits for those that are. */
660 if (transp)
661 compute_transp (expr->expr, indx, transp);
663 /* The occurrences recorded in antic_occr are exactly those that
664 we want to set to nonzero in ANTLOC. */
665 if (antloc)
666 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
668 SET_BIT (antloc[BLOCK_FOR_INSN (occr->insn)->index], indx);
670 /* While we're scanning the table, this is a good place to
671 initialize this. */
672 occr->deleted_p = 0;
675 /* The occurrences recorded in avail_occr are exactly those that
676 we want to set to nonzero in COMP. */
677 if (comp)
678 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
680 SET_BIT (comp[BLOCK_FOR_INSN (occr->insn)->index], indx);
682 /* While we're scanning the table, this is a good place to
683 initialize this. */
684 occr->copied_p = 0;
687 /* While we're scanning the table, this is a good place to
688 initialize this. */
689 expr->reaching_reg = 0;
694 /* Hash table support. */
696 struct reg_avail_info
698 basic_block last_bb;
699 int first_set;
700 int last_set;
703 static struct reg_avail_info *reg_avail_info;
704 static basic_block current_bb;
706 /* See whether X, the source of a set, is something we want to consider for
707 GCSE. */
709 static int
710 want_to_gcse_p (rtx x, int *max_distance_ptr)
712 #ifdef STACK_REGS
713 /* On register stack architectures, don't GCSE constants from the
714 constant pool, as the benefits are often swamped by the overhead
715 of shuffling the register stack between basic blocks. */
716 if (IS_STACK_MODE (GET_MODE (x)))
717 x = avoid_constant_pool_reference (x);
718 #endif
720 /* GCSE'ing constants:
722 We do not specifically distinguish between constant and non-constant
723 expressions in PRE and Hoist. We use set_src_cost below to limit
724 the maximum distance simple expressions can travel.
726 Nevertheless, constants are much easier to GCSE, and, hence,
727 it is easy to overdo the optimizations. Usually, excessive PRE and
728 Hoisting of constant leads to increased register pressure.
730 RA can deal with this by rematerialing some of the constants.
731 Therefore, it is important that the back-end generates sets of constants
732 in a way that allows reload rematerialize them under high register
733 pressure, i.e., a pseudo register with REG_EQUAL to constant
734 is set only once. Failing to do so will result in IRA/reload
735 spilling such constants under high register pressure instead of
736 rematerializing them. */
738 switch (GET_CODE (x))
740 case REG:
741 case SUBREG:
742 case CALL:
743 return 0;
745 case CONST_INT:
746 case CONST_DOUBLE:
747 case CONST_FIXED:
748 case CONST_VECTOR:
749 if (!doing_code_hoisting_p)
750 /* Do not PRE constants. */
751 return 0;
753 /* FALLTHRU */
755 default:
756 if (doing_code_hoisting_p)
757 /* PRE doesn't implement max_distance restriction. */
759 int cost;
760 int max_distance;
762 gcc_assert (!optimize_function_for_speed_p (cfun)
763 && optimize_function_for_size_p (cfun));
764 cost = set_src_cost (x, 0);
766 if (cost < COSTS_N_INSNS (GCSE_UNRESTRICTED_COST))
768 max_distance = (GCSE_COST_DISTANCE_RATIO * cost) / 10;
769 if (max_distance == 0)
770 return 0;
772 gcc_assert (max_distance > 0);
774 else
775 max_distance = 0;
777 if (max_distance_ptr)
778 *max_distance_ptr = max_distance;
781 return can_assign_to_reg_without_clobbers_p (x);
785 /* Used internally by can_assign_to_reg_without_clobbers_p. */
787 static GTY(()) rtx test_insn;
789 /* Return true if we can assign X to a pseudo register such that the
790 resulting insn does not result in clobbering a hard register as a
791 side-effect.
793 Additionally, if the target requires it, check that the resulting insn
794 can be copied. If it cannot, this means that X is special and probably
795 has hidden side-effects we don't want to mess with.
797 This function is typically used by code motion passes, to verify
798 that it is safe to insert an insn without worrying about clobbering
799 maybe live hard regs. */
801 bool
802 can_assign_to_reg_without_clobbers_p (rtx x)
804 int num_clobbers = 0;
805 int icode;
807 /* If this is a valid operand, we are OK. If it's VOIDmode, we aren't. */
808 if (general_operand (x, GET_MODE (x)))
809 return 1;
810 else if (GET_MODE (x) == VOIDmode)
811 return 0;
813 /* Otherwise, check if we can make a valid insn from it. First initialize
814 our test insn if we haven't already. */
815 if (test_insn == 0)
817 test_insn
818 = make_insn_raw (gen_rtx_SET (VOIDmode,
819 gen_rtx_REG (word_mode,
820 FIRST_PSEUDO_REGISTER * 2),
821 const0_rtx));
822 NEXT_INSN (test_insn) = PREV_INSN (test_insn) = 0;
825 /* Now make an insn like the one we would make when GCSE'ing and see if
826 valid. */
827 PUT_MODE (SET_DEST (PATTERN (test_insn)), GET_MODE (x));
828 SET_SRC (PATTERN (test_insn)) = x;
830 icode = recog (PATTERN (test_insn), test_insn, &num_clobbers);
831 if (icode < 0)
832 return false;
834 if (num_clobbers > 0 && added_clobbers_hard_reg_p (icode))
835 return false;
837 if (targetm.cannot_copy_insn_p && targetm.cannot_copy_insn_p (test_insn))
838 return false;
840 return true;
843 /* Return nonzero if the operands of expression X are unchanged from the
844 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
845 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */
847 static int
848 oprs_unchanged_p (const_rtx x, const_rtx insn, int avail_p)
850 int i, j;
851 enum rtx_code code;
852 const char *fmt;
854 if (x == 0)
855 return 1;
857 code = GET_CODE (x);
858 switch (code)
860 case REG:
862 struct reg_avail_info *info = &reg_avail_info[REGNO (x)];
864 if (info->last_bb != current_bb)
865 return 1;
866 if (avail_p)
867 return info->last_set < DF_INSN_LUID (insn);
868 else
869 return info->first_set >= DF_INSN_LUID (insn);
872 case MEM:
873 if (load_killed_in_block_p (current_bb, DF_INSN_LUID (insn),
874 x, avail_p))
875 return 0;
876 else
877 return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
879 case PRE_DEC:
880 case PRE_INC:
881 case POST_DEC:
882 case POST_INC:
883 case PRE_MODIFY:
884 case POST_MODIFY:
885 return 0;
887 case PC:
888 case CC0: /*FIXME*/
889 case CONST:
890 case CONST_INT:
891 case CONST_DOUBLE:
892 case CONST_FIXED:
893 case CONST_VECTOR:
894 case SYMBOL_REF:
895 case LABEL_REF:
896 case ADDR_VEC:
897 case ADDR_DIFF_VEC:
898 return 1;
900 default:
901 break;
904 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
906 if (fmt[i] == 'e')
908 /* If we are about to do the last recursive call needed at this
909 level, change it into iteration. This function is called enough
910 to be worth it. */
911 if (i == 0)
912 return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
914 else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
915 return 0;
917 else if (fmt[i] == 'E')
918 for (j = 0; j < XVECLEN (x, i); j++)
919 if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
920 return 0;
923 return 1;
926 /* Info passed from load_killed_in_block_p to mems_conflict_for_gcse_p. */
928 struct mem_conflict_info
930 /* A memory reference for a load instruction, mems_conflict_for_gcse_p will
931 see if a memory store conflicts with this memory load. */
932 const_rtx mem;
934 /* True if mems_conflict_for_gcse_p finds a conflict between two memory
935 references. */
936 bool conflict;
939 /* DEST is the output of an instruction. If it is a memory reference and
940 possibly conflicts with the load found in DATA, then communicate this
941 information back through DATA. */
943 static void
944 mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
945 void *data)
947 struct mem_conflict_info *mci = (struct mem_conflict_info *) data;
949 while (GET_CODE (dest) == SUBREG
950 || GET_CODE (dest) == ZERO_EXTRACT
951 || GET_CODE (dest) == STRICT_LOW_PART)
952 dest = XEXP (dest, 0);
954 /* If DEST is not a MEM, then it will not conflict with the load. Note
955 that function calls are assumed to clobber memory, but are handled
956 elsewhere. */
957 if (! MEM_P (dest))
958 return;
960 /* If we are setting a MEM in our list of specially recognized MEMs,
961 don't mark as killed this time. */
962 if (pre_ldst_mems != NULL && expr_equiv_p (dest, mci->mem))
964 if (!find_rtx_in_ldst (dest))
965 mci->conflict = true;
966 return;
969 if (true_dependence (dest, GET_MODE (dest), mci->mem))
970 mci->conflict = true;
973 /* Return nonzero if the expression in X (a memory reference) is killed
974 in block BB before or after the insn with the LUID in UID_LIMIT.
975 AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
976 before UID_LIMIT.
978 To check the entire block, set UID_LIMIT to max_uid + 1 and
979 AVAIL_P to 0. */
981 static int
982 load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x,
983 int avail_p)
985 VEC (rtx,heap) *list = modify_mem_list[bb->index];
986 rtx setter;
987 unsigned ix;
989 /* If this is a readonly then we aren't going to be changing it. */
990 if (MEM_READONLY_P (x))
991 return 0;
993 FOR_EACH_VEC_ELT_REVERSE (rtx, list, ix, setter)
995 struct mem_conflict_info mci;
997 /* Ignore entries in the list that do not apply. */
998 if ((avail_p
999 && DF_INSN_LUID (setter) < uid_limit)
1000 || (! avail_p
1001 && DF_INSN_LUID (setter) > uid_limit))
1002 continue;
1004 /* If SETTER is a call everything is clobbered. Note that calls
1005 to pure functions are never put on the list, so we need not
1006 worry about them. */
1007 if (CALL_P (setter))
1008 return 1;
1010 /* SETTER must be an INSN of some kind that sets memory. Call
1011 note_stores to examine each hunk of memory that is modified. */
1012 mci.mem = x;
1013 mci.conflict = false;
1014 note_stores (PATTERN (setter), mems_conflict_for_gcse_p, &mci);
1015 if (mci.conflict)
1016 return 1;
1018 return 0;
1021 /* Return nonzero if the operands of expression X are unchanged from
1022 the start of INSN's basic block up to but not including INSN. */
1024 static int
1025 oprs_anticipatable_p (const_rtx x, const_rtx insn)
1027 return oprs_unchanged_p (x, insn, 0);
1030 /* Return nonzero if the operands of expression X are unchanged from
1031 INSN to the end of INSN's basic block. */
1033 static int
1034 oprs_available_p (const_rtx x, const_rtx insn)
1036 return oprs_unchanged_p (x, insn, 1);
1039 /* Hash expression X.
1041 MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean
1042 indicating if a volatile operand is found or if the expression contains
1043 something we don't want to insert in the table. HASH_TABLE_SIZE is
1044 the current size of the hash table to be probed. */
1046 static unsigned int
1047 hash_expr (const_rtx x, enum machine_mode mode, int *do_not_record_p,
1048 int hash_table_size)
1050 unsigned int hash;
1052 *do_not_record_p = 0;
1054 hash = hash_rtx (x, mode, do_not_record_p, NULL, /*have_reg_qty=*/false);
1055 return hash % hash_table_size;
1058 /* Return nonzero if exp1 is equivalent to exp2. */
1060 static int
1061 expr_equiv_p (const_rtx x, const_rtx y)
1063 return exp_equiv_p (x, y, 0, true);
1066 /* Insert expression X in INSN in the hash TABLE.
1067 If it is already present, record it as the last occurrence in INSN's
1068 basic block.
1070 MODE is the mode of the value X is being stored into.
1071 It is only used if X is a CONST_INT.
1073 ANTIC_P is nonzero if X is an anticipatable expression.
1074 AVAIL_P is nonzero if X is an available expression.
1076 MAX_DISTANCE is the maximum distance in instructions this expression can
1077 be moved. */
1079 static void
1080 insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
1081 int avail_p, int max_distance, struct hash_table_d *table)
1083 int found, do_not_record_p;
1084 unsigned int hash;
1085 struct expr *cur_expr, *last_expr = NULL;
1086 struct occr *antic_occr, *avail_occr;
1088 hash = hash_expr (x, mode, &do_not_record_p, table->size);
1090 /* Do not insert expression in table if it contains volatile operands,
1091 or if hash_expr determines the expression is something we don't want
1092 to or can't handle. */
1093 if (do_not_record_p)
1094 return;
1096 cur_expr = table->table[hash];
1097 found = 0;
1099 while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1101 /* If the expression isn't found, save a pointer to the end of
1102 the list. */
1103 last_expr = cur_expr;
1104 cur_expr = cur_expr->next_same_hash;
1107 if (! found)
1109 cur_expr = GOBNEW (struct expr);
1110 bytes_used += sizeof (struct expr);
1111 if (table->table[hash] == NULL)
1112 /* This is the first pattern that hashed to this index. */
1113 table->table[hash] = cur_expr;
1114 else
1115 /* Add EXPR to end of this hash chain. */
1116 last_expr->next_same_hash = cur_expr;
1118 /* Set the fields of the expr element. */
1119 cur_expr->expr = x;
1120 cur_expr->bitmap_index = table->n_elems++;
1121 cur_expr->next_same_hash = NULL;
1122 cur_expr->antic_occr = NULL;
1123 cur_expr->avail_occr = NULL;
1124 gcc_assert (max_distance >= 0);
1125 cur_expr->max_distance = max_distance;
1127 else
1128 gcc_assert (cur_expr->max_distance == max_distance);
1130 /* Now record the occurrence(s). */
1131 if (antic_p)
1133 antic_occr = cur_expr->antic_occr;
1135 if (antic_occr
1136 && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn))
1137 antic_occr = NULL;
1139 if (antic_occr)
1140 /* Found another instance of the expression in the same basic block.
1141 Prefer the currently recorded one. We want the first one in the
1142 block and the block is scanned from start to end. */
1143 ; /* nothing to do */
1144 else
1146 /* First occurrence of this expression in this basic block. */
1147 antic_occr = GOBNEW (struct occr);
1148 bytes_used += sizeof (struct occr);
1149 antic_occr->insn = insn;
1150 antic_occr->next = cur_expr->antic_occr;
1151 antic_occr->deleted_p = 0;
1152 cur_expr->antic_occr = antic_occr;
1156 if (avail_p)
1158 avail_occr = cur_expr->avail_occr;
1160 if (avail_occr
1161 && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn))
1163 /* Found another instance of the expression in the same basic block.
1164 Prefer this occurrence to the currently recorded one. We want
1165 the last one in the block and the block is scanned from start
1166 to end. */
1167 avail_occr->insn = insn;
1169 else
1171 /* First occurrence of this expression in this basic block. */
1172 avail_occr = GOBNEW (struct occr);
1173 bytes_used += sizeof (struct occr);
1174 avail_occr->insn = insn;
1175 avail_occr->next = cur_expr->avail_occr;
1176 avail_occr->deleted_p = 0;
1177 cur_expr->avail_occr = avail_occr;
1182 /* Scan SET present in INSN and add an entry to the hash TABLE. */
1184 static void
1185 hash_scan_set (rtx set, rtx insn, struct hash_table_d *table)
1187 rtx src = SET_SRC (set);
1188 rtx dest = SET_DEST (set);
1189 rtx note;
1191 if (GET_CODE (src) == CALL)
1192 hash_scan_call (src, insn, table);
1194 else if (REG_P (dest))
1196 unsigned int regno = REGNO (dest);
1197 int max_distance = 0;
1199 /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
1201 This allows us to do a single GCSE pass and still eliminate
1202 redundant constants, addresses or other expressions that are
1203 constructed with multiple instructions.
1205 However, keep the original SRC if INSN is a simple reg-reg move.
1206 In this case, there will almost always be a REG_EQUAL note on the
1207 insn that sets SRC. By recording the REG_EQUAL value here as SRC
1208 for INSN, we miss copy propagation opportunities and we perform the
1209 same PRE GCSE operation repeatedly on the same REG_EQUAL value if we
1210 do more than one PRE GCSE pass.
1212 Note that this does not impede profitable constant propagations. We
1213 "look through" reg-reg sets in lookup_avail_set. */
1214 note = find_reg_equal_equiv_note (insn);
1215 if (note != 0
1216 && REG_NOTE_KIND (note) == REG_EQUAL
1217 && !REG_P (src)
1218 && want_to_gcse_p (XEXP (note, 0), NULL))
1219 src = XEXP (note, 0), set = gen_rtx_SET (VOIDmode, dest, src);
1221 /* Only record sets of pseudo-regs in the hash table. */
1222 if (regno >= FIRST_PSEUDO_REGISTER
1223 /* Don't GCSE something if we can't do a reg/reg copy. */
1224 && can_copy_p (GET_MODE (dest))
1225 /* GCSE commonly inserts instruction after the insn. We can't
1226 do that easily for EH edges so disable GCSE on these for now. */
1227 /* ??? We can now easily create new EH landing pads at the
1228 gimple level, for splitting edges; there's no reason we
1229 can't do the same thing at the rtl level. */
1230 && !can_throw_internal (insn)
1231 /* Is SET_SRC something we want to gcse? */
1232 && want_to_gcse_p (src, &max_distance)
1233 /* Don't CSE a nop. */
1234 && ! set_noop_p (set)
1235 /* Don't GCSE if it has attached REG_EQUIV note.
1236 At this point this only function parameters should have
1237 REG_EQUIV notes and if the argument slot is used somewhere
1238 explicitly, it means address of parameter has been taken,
1239 so we should not extend the lifetime of the pseudo. */
1240 && (note == NULL_RTX || ! MEM_P (XEXP (note, 0))))
1242 /* An expression is not anticipatable if its operands are
1243 modified before this insn or if this is not the only SET in
1244 this insn. The latter condition does not have to mean that
1245 SRC itself is not anticipatable, but we just will not be
1246 able to handle code motion of insns with multiple sets. */
1247 int antic_p = oprs_anticipatable_p (src, insn)
1248 && !multiple_sets (insn);
1249 /* An expression is not available if its operands are
1250 subsequently modified, including this insn. It's also not
1251 available if this is a branch, because we can't insert
1252 a set after the branch. */
1253 int avail_p = (oprs_available_p (src, insn)
1254 && ! JUMP_P (insn));
1256 insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p,
1257 max_distance, table);
1260 /* In case of store we want to consider the memory value as available in
1261 the REG stored in that memory. This makes it possible to remove
1262 redundant loads from due to stores to the same location. */
1263 else if (flag_gcse_las && REG_P (src) && MEM_P (dest))
1265 unsigned int regno = REGNO (src);
1266 int max_distance = 0;
1268 /* Only record sets of pseudo-regs in the hash table. */
1269 if (regno >= FIRST_PSEUDO_REGISTER
1270 /* Don't GCSE something if we can't do a reg/reg copy. */
1271 && can_copy_p (GET_MODE (src))
1272 /* GCSE commonly inserts instruction after the insn. We can't
1273 do that easily for EH edges so disable GCSE on these for now. */
1274 && !can_throw_internal (insn)
1275 /* Is SET_DEST something we want to gcse? */
1276 && want_to_gcse_p (dest, &max_distance)
1277 /* Don't CSE a nop. */
1278 && ! set_noop_p (set)
1279 /* Don't GCSE if it has attached REG_EQUIV note.
1280 At this point this only function parameters should have
1281 REG_EQUIV notes and if the argument slot is used somewhere
1282 explicitly, it means address of parameter has been taken,
1283 so we should not extend the lifetime of the pseudo. */
1284 && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
1285 || ! MEM_P (XEXP (note, 0))))
1287 /* Stores are never anticipatable. */
1288 int antic_p = 0;
1289 /* An expression is not available if its operands are
1290 subsequently modified, including this insn. It's also not
1291 available if this is a branch, because we can't insert
1292 a set after the branch. */
1293 int avail_p = oprs_available_p (dest, insn)
1294 && ! JUMP_P (insn);
1296 /* Record the memory expression (DEST) in the hash table. */
1297 insert_expr_in_table (dest, GET_MODE (dest), insn,
1298 antic_p, avail_p, max_distance, table);
1303 static void
1304 hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1305 struct hash_table_d *table ATTRIBUTE_UNUSED)
1307 /* Currently nothing to do. */
1310 static void
1311 hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1312 struct hash_table_d *table ATTRIBUTE_UNUSED)
1314 /* Currently nothing to do. */
1317 /* Process INSN and add hash table entries as appropriate. */
1319 static void
1320 hash_scan_insn (rtx insn, struct hash_table_d *table)
1322 rtx pat = PATTERN (insn);
1323 int i;
1325 /* Pick out the sets of INSN and for other forms of instructions record
1326 what's been modified. */
1328 if (GET_CODE (pat) == SET)
1329 hash_scan_set (pat, insn, table);
1331 else if (GET_CODE (pat) == CLOBBER)
1332 hash_scan_clobber (pat, insn, table);
1334 else if (GET_CODE (pat) == CALL)
1335 hash_scan_call (pat, insn, table);
1337 else if (GET_CODE (pat) == PARALLEL)
1338 for (i = 0; i < XVECLEN (pat, 0); i++)
1340 rtx x = XVECEXP (pat, 0, i);
1342 if (GET_CODE (x) == SET)
1343 hash_scan_set (x, insn, table);
1344 else if (GET_CODE (x) == CLOBBER)
1345 hash_scan_clobber (x, insn, table);
1346 else if (GET_CODE (x) == CALL)
1347 hash_scan_call (x, insn, table);
1351 /* Dump the hash table TABLE to file FILE under the name NAME. */
1353 static void
1354 dump_hash_table (FILE *file, const char *name, struct hash_table_d *table)
1356 int i;
1357 /* Flattened out table, so it's printed in proper order. */
1358 struct expr **flat_table;
1359 unsigned int *hash_val;
1360 struct expr *expr;
1362 flat_table = XCNEWVEC (struct expr *, table->n_elems);
1363 hash_val = XNEWVEC (unsigned int, table->n_elems);
1365 for (i = 0; i < (int) table->size; i++)
1366 for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
1368 flat_table[expr->bitmap_index] = expr;
1369 hash_val[expr->bitmap_index] = i;
1372 fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1373 name, table->size, table->n_elems);
1375 for (i = 0; i < (int) table->n_elems; i++)
1376 if (flat_table[i] != 0)
1378 expr = flat_table[i];
1379 fprintf (file, "Index %d (hash value %d; max distance %d)\n ",
1380 expr->bitmap_index, hash_val[i], expr->max_distance);
1381 print_rtl (file, expr->expr);
1382 fprintf (file, "\n");
1385 fprintf (file, "\n");
1387 free (flat_table);
1388 free (hash_val);
1391 /* Record register first/last/block set information for REGNO in INSN.
1393 first_set records the first place in the block where the register
1394 is set and is used to compute "anticipatability".
1396 last_set records the last place in the block where the register
1397 is set and is used to compute "availability".
1399 last_bb records the block for which first_set and last_set are
1400 valid, as a quick test to invalidate them. */
1402 static void
1403 record_last_reg_set_info (rtx insn, int regno)
1405 struct reg_avail_info *info = &reg_avail_info[regno];
1406 int luid = DF_INSN_LUID (insn);
1408 info->last_set = luid;
1409 if (info->last_bb != current_bb)
1411 info->last_bb = current_bb;
1412 info->first_set = luid;
1416 /* Record all of the canonicalized MEMs of record_last_mem_set_info's insn.
1417 Note we store a pair of elements in the list, so they have to be
1418 taken off pairwise. */
1420 static void
1421 canon_list_insert (rtx dest ATTRIBUTE_UNUSED, const_rtx x ATTRIBUTE_UNUSED,
1422 void * v_insn)
1424 rtx dest_addr, insn;
1425 int bb;
1426 modify_pair *pair;
1428 while (GET_CODE (dest) == SUBREG
1429 || GET_CODE (dest) == ZERO_EXTRACT
1430 || GET_CODE (dest) == STRICT_LOW_PART)
1431 dest = XEXP (dest, 0);
1433 /* If DEST is not a MEM, then it will not conflict with a load. Note
1434 that function calls are assumed to clobber memory, but are handled
1435 elsewhere. */
1437 if (! MEM_P (dest))
1438 return;
1440 dest_addr = get_addr (XEXP (dest, 0));
1441 dest_addr = canon_rtx (dest_addr);
1442 insn = (rtx) v_insn;
1443 bb = BLOCK_FOR_INSN (insn)->index;
1445 pair = VEC_safe_push (modify_pair, heap, canon_modify_mem_list[bb], NULL);
1446 pair->dest = dest;
1447 pair->dest_addr = dest_addr;
1450 /* Record memory modification information for INSN. We do not actually care
1451 about the memory location(s) that are set, or even how they are set (consider
1452 a CALL_INSN). We merely need to record which insns modify memory. */
1454 static void
1455 record_last_mem_set_info (rtx insn)
1457 int bb = BLOCK_FOR_INSN (insn)->index;
1459 /* load_killed_in_block_p will handle the case of calls clobbering
1460 everything. */
1461 VEC_safe_push (rtx, heap, modify_mem_list[bb], insn);
1462 bitmap_set_bit (modify_mem_list_set, bb);
1464 if (CALL_P (insn))
1465 bitmap_set_bit (blocks_with_calls, bb);
1466 else
1467 note_stores (PATTERN (insn), canon_list_insert, (void*) insn);
1470 /* Called from compute_hash_table via note_stores to handle one
1471 SET or CLOBBER in an insn. DATA is really the instruction in which
1472 the SET is taking place. */
1474 static void
1475 record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
1477 rtx last_set_insn = (rtx) data;
1479 if (GET_CODE (dest) == SUBREG)
1480 dest = SUBREG_REG (dest);
1482 if (REG_P (dest))
1483 record_last_reg_set_info (last_set_insn, REGNO (dest));
1484 else if (MEM_P (dest)
1485 /* Ignore pushes, they clobber nothing. */
1486 && ! push_operand (dest, GET_MODE (dest)))
1487 record_last_mem_set_info (last_set_insn);
1490 /* Top level function to create an expression hash table.
1492 Expression entries are placed in the hash table if
1493 - they are of the form (set (pseudo-reg) src),
1494 - src is something we want to perform GCSE on,
1495 - none of the operands are subsequently modified in the block
1497 Currently src must be a pseudo-reg or a const_int.
1499 TABLE is the table computed. */
1501 static void
1502 compute_hash_table_work (struct hash_table_d *table)
1504 int i;
1506 /* re-Cache any INSN_LIST nodes we have allocated. */
1507 clear_modify_mem_tables ();
1508 /* Some working arrays used to track first and last set in each block. */
1509 reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ());
1511 for (i = 0; i < max_reg_num (); ++i)
1512 reg_avail_info[i].last_bb = NULL;
1514 FOR_EACH_BB (current_bb)
1516 rtx insn;
1517 unsigned int regno;
1519 /* First pass over the instructions records information used to
1520 determine when registers and memory are first and last set. */
1521 FOR_BB_INSNS (current_bb, insn)
1523 if (!NONDEBUG_INSN_P (insn))
1524 continue;
1526 if (CALL_P (insn))
1528 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1529 if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
1530 record_last_reg_set_info (insn, regno);
1532 if (! RTL_CONST_OR_PURE_CALL_P (insn))
1533 record_last_mem_set_info (insn);
1536 note_stores (PATTERN (insn), record_last_set_info, insn);
1539 /* The next pass builds the hash table. */
1540 FOR_BB_INSNS (current_bb, insn)
1541 if (NONDEBUG_INSN_P (insn))
1542 hash_scan_insn (insn, table);
1545 free (reg_avail_info);
1546 reg_avail_info = NULL;
1549 /* Allocate space for the set/expr hash TABLE.
1550 It is used to determine the number of buckets to use. */
1552 static void
1553 alloc_hash_table (struct hash_table_d *table)
1555 int n;
1557 n = get_max_insn_count ();
1559 table->size = n / 4;
1560 if (table->size < 11)
1561 table->size = 11;
1563 /* Attempt to maintain efficient use of hash table.
1564 Making it an odd number is simplest for now.
1565 ??? Later take some measurements. */
1566 table->size |= 1;
1567 n = table->size * sizeof (struct expr *);
1568 table->table = GNEWVAR (struct expr *, n);
1571 /* Free things allocated by alloc_hash_table. */
1573 static void
1574 free_hash_table (struct hash_table_d *table)
1576 free (table->table);
1579 /* Compute the expression hash table TABLE. */
1581 static void
1582 compute_hash_table (struct hash_table_d *table)
1584 /* Initialize count of number of entries in hash table. */
1585 table->n_elems = 0;
1586 memset (table->table, 0, table->size * sizeof (struct expr *));
1588 compute_hash_table_work (table);
1591 /* Expression tracking support. */
1593 /* Clear canon_modify_mem_list and modify_mem_list tables. */
1594 static void
1595 clear_modify_mem_tables (void)
1597 unsigned i;
1598 bitmap_iterator bi;
1600 EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
1602 VEC_free (rtx, heap, modify_mem_list[i]);
1603 VEC_free (modify_pair, heap, canon_modify_mem_list[i]);
1605 bitmap_clear (modify_mem_list_set);
1606 bitmap_clear (blocks_with_calls);
1609 /* Release memory used by modify_mem_list_set. */
1611 static void
1612 free_modify_mem_tables (void)
1614 clear_modify_mem_tables ();
1615 free (modify_mem_list);
1616 free (canon_modify_mem_list);
1617 modify_mem_list = 0;
1618 canon_modify_mem_list = 0;
1621 /* For each block, compute whether X is transparent. X is either an
1622 expression or an assignment [though we don't care which, for this context
1623 an assignment is treated as an expression]. For each block where an
1624 element of X is modified, reset the INDX bit in BMAP. */
1626 static void
1627 compute_transp (const_rtx x, int indx, sbitmap *bmap)
1629 int i, j;
1630 enum rtx_code code;
1631 const char *fmt;
1633 /* repeat is used to turn tail-recursion into iteration since GCC
1634 can't do it when there's no return value. */
1635 repeat:
1637 if (x == 0)
1638 return;
1640 code = GET_CODE (x);
1641 switch (code)
1643 case REG:
1645 df_ref def;
1646 for (def = DF_REG_DEF_CHAIN (REGNO (x));
1647 def;
1648 def = DF_REF_NEXT_REG (def))
1649 RESET_BIT (bmap[DF_REF_BB (def)->index], indx);
1652 return;
1654 case MEM:
1655 if (! MEM_READONLY_P (x))
1657 bitmap_iterator bi;
1658 unsigned bb_index;
1660 /* First handle all the blocks with calls. We don't need to
1661 do any list walking for them. */
1662 EXECUTE_IF_SET_IN_BITMAP (blocks_with_calls, 0, bb_index, bi)
1664 RESET_BIT (bmap[bb_index], indx);
1667 /* Now iterate over the blocks which have memory modifications
1668 but which do not have any calls. */
1669 EXECUTE_IF_AND_COMPL_IN_BITMAP (modify_mem_list_set,
1670 blocks_with_calls,
1671 0, bb_index, bi)
1673 VEC (modify_pair,heap) *list
1674 = canon_modify_mem_list[bb_index];
1675 modify_pair *pair;
1676 unsigned ix;
1678 FOR_EACH_VEC_ELT_REVERSE (modify_pair, list, ix, pair)
1680 rtx dest = pair->dest;
1681 rtx dest_addr = pair->dest_addr;
1683 if (canon_true_dependence (dest, GET_MODE (dest),
1684 dest_addr, x, NULL_RTX))
1685 RESET_BIT (bmap[bb_index], indx);
1690 x = XEXP (x, 0);
1691 goto repeat;
1693 case PC:
1694 case CC0: /*FIXME*/
1695 case CONST:
1696 case CONST_INT:
1697 case CONST_DOUBLE:
1698 case CONST_FIXED:
1699 case CONST_VECTOR:
1700 case SYMBOL_REF:
1701 case LABEL_REF:
1702 case ADDR_VEC:
1703 case ADDR_DIFF_VEC:
1704 return;
1706 default:
1707 break;
1710 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1712 if (fmt[i] == 'e')
1714 /* If we are about to do the last recursive call
1715 needed at this level, change it into iteration.
1716 This function is called enough to be worth it. */
1717 if (i == 0)
1719 x = XEXP (x, i);
1720 goto repeat;
1723 compute_transp (XEXP (x, i), indx, bmap);
1725 else if (fmt[i] == 'E')
1726 for (j = 0; j < XVECLEN (x, i); j++)
1727 compute_transp (XVECEXP (x, i, j), indx, bmap);
1731 /* Compute PRE+LCM working variables. */
1733 /* Local properties of expressions. */
1735 /* Nonzero for expressions that are transparent in the block. */
1736 static sbitmap *transp;
1738 /* Nonzero for expressions that are computed (available) in the block. */
1739 static sbitmap *comp;
1741 /* Nonzero for expressions that are locally anticipatable in the block. */
1742 static sbitmap *antloc;
1744 /* Nonzero for expressions where this block is an optimal computation
1745 point. */
1746 static sbitmap *pre_optimal;
1748 /* Nonzero for expressions which are redundant in a particular block. */
1749 static sbitmap *pre_redundant;
1751 /* Nonzero for expressions which should be inserted on a specific edge. */
1752 static sbitmap *pre_insert_map;
1754 /* Nonzero for expressions which should be deleted in a specific block. */
1755 static sbitmap *pre_delete_map;
1757 /* Allocate vars used for PRE analysis. */
1759 static void
1760 alloc_pre_mem (int n_blocks, int n_exprs)
1762 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
1763 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
1764 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
1766 pre_optimal = NULL;
1767 pre_redundant = NULL;
1768 pre_insert_map = NULL;
1769 pre_delete_map = NULL;
1770 ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
1772 /* pre_insert and pre_delete are allocated later. */
1775 /* Free vars used for PRE analysis. */
1777 static void
1778 free_pre_mem (void)
1780 sbitmap_vector_free (transp);
1781 sbitmap_vector_free (comp);
1783 /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */
1785 if (pre_optimal)
1786 sbitmap_vector_free (pre_optimal);
1787 if (pre_redundant)
1788 sbitmap_vector_free (pre_redundant);
1789 if (pre_insert_map)
1790 sbitmap_vector_free (pre_insert_map);
1791 if (pre_delete_map)
1792 sbitmap_vector_free (pre_delete_map);
1794 transp = comp = NULL;
1795 pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
1798 /* Remove certain expressions from anticipatable and transparent
1799 sets of basic blocks that have incoming abnormal edge.
1800 For PRE remove potentially trapping expressions to avoid placing
1801 them on abnormal edges. For hoisting remove memory references that
1802 can be clobbered by calls. */
1804 static void
1805 prune_expressions (bool pre_p)
1807 sbitmap prune_exprs;
1808 struct expr *expr;
1809 unsigned int ui;
1810 basic_block bb;
1812 prune_exprs = sbitmap_alloc (expr_hash_table.n_elems);
1813 sbitmap_zero (prune_exprs);
1814 for (ui = 0; ui < expr_hash_table.size; ui++)
1816 for (expr = expr_hash_table.table[ui]; expr; expr = expr->next_same_hash)
1818 /* Note potentially trapping expressions. */
1819 if (may_trap_p (expr->expr))
1821 SET_BIT (prune_exprs, expr->bitmap_index);
1822 continue;
1825 if (!pre_p && MEM_P (expr->expr))
1826 /* Note memory references that can be clobbered by a call.
1827 We do not split abnormal edges in hoisting, so would
1828 a memory reference get hoisted along an abnormal edge,
1829 it would be placed /before/ the call. Therefore, only
1830 constant memory references can be hoisted along abnormal
1831 edges. */
1833 if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
1834 && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
1835 continue;
1837 if (MEM_READONLY_P (expr->expr)
1838 && !MEM_VOLATILE_P (expr->expr)
1839 && MEM_NOTRAP_P (expr->expr))
1840 /* Constant memory reference, e.g., a PIC address. */
1841 continue;
1843 /* ??? Optimally, we would use interprocedural alias
1844 analysis to determine if this mem is actually killed
1845 by this call. */
1847 SET_BIT (prune_exprs, expr->bitmap_index);
1852 FOR_EACH_BB (bb)
1854 edge e;
1855 edge_iterator ei;
1857 /* If the current block is the destination of an abnormal edge, we
1858 kill all trapping (for PRE) and memory (for hoist) expressions
1859 because we won't be able to properly place the instruction on
1860 the edge. So make them neither anticipatable nor transparent.
1861 This is fairly conservative.
1863 ??? For hoisting it may be necessary to check for set-and-jump
1864 instructions here, not just for abnormal edges. The general problem
1865 is that when an expression cannot not be placed right at the end of
1866 a basic block we should account for any side-effects of a subsequent
1867 jump instructions that could clobber the expression. It would
1868 be best to implement this check along the lines of
1869 hoist_expr_reaches_here_p where the target block is already known
1870 and, hence, there's no need to conservatively prune expressions on
1871 "intermediate" set-and-jump instructions. */
1872 FOR_EACH_EDGE (e, ei, bb->preds)
1873 if ((e->flags & EDGE_ABNORMAL)
1874 && (pre_p || CALL_P (BB_END (e->src))))
1876 sbitmap_difference (antloc[bb->index],
1877 antloc[bb->index], prune_exprs);
1878 sbitmap_difference (transp[bb->index],
1879 transp[bb->index], prune_exprs);
1880 break;
1884 sbitmap_free (prune_exprs);
1887 /* It may be necessary to insert a large number of insns on edges to
1888 make the existing occurrences of expressions fully redundant. This
1889 routine examines the set of insertions and deletions and if the ratio
1890 of insertions to deletions is too high for a particular expression, then
1891 the expression is removed from the insertion/deletion sets.
1893 N_ELEMS is the number of elements in the hash table. */
1895 static void
1896 prune_insertions_deletions (int n_elems)
1898 sbitmap_iterator sbi;
1899 sbitmap prune_exprs;
1901 /* We always use I to iterate over blocks/edges and J to iterate over
1902 expressions. */
1903 unsigned int i, j;
1905 /* Counts for the number of times an expression needs to be inserted and
1906 number of times an expression can be removed as a result. */
1907 int *insertions = GCNEWVEC (int, n_elems);
1908 int *deletions = GCNEWVEC (int, n_elems);
1910 /* Set of expressions which require too many insertions relative to
1911 the number of deletions achieved. We will prune these out of the
1912 insertion/deletion sets. */
1913 prune_exprs = sbitmap_alloc (n_elems);
1914 sbitmap_zero (prune_exprs);
1916 /* Iterate over the edges counting the number of times each expression
1917 needs to be inserted. */
1918 for (i = 0; i < (unsigned) n_edges; i++)
1920 EXECUTE_IF_SET_IN_SBITMAP (pre_insert_map[i], 0, j, sbi)
1921 insertions[j]++;
1924 /* Similarly for deletions, but those occur in blocks rather than on
1925 edges. */
1926 for (i = 0; i < (unsigned) last_basic_block; i++)
1928 EXECUTE_IF_SET_IN_SBITMAP (pre_delete_map[i], 0, j, sbi)
1929 deletions[j]++;
1932 /* Now that we have accurate counts, iterate over the elements in the
1933 hash table and see if any need too many insertions relative to the
1934 number of evaluations that can be removed. If so, mark them in
1935 PRUNE_EXPRS. */
1936 for (j = 0; j < (unsigned) n_elems; j++)
1937 if (deletions[j]
1938 && ((unsigned) insertions[j] / deletions[j]) > MAX_GCSE_INSERTION_RATIO)
1939 SET_BIT (prune_exprs, j);
1941 /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS. */
1942 EXECUTE_IF_SET_IN_SBITMAP (prune_exprs, 0, j, sbi)
1944 for (i = 0; i < (unsigned) n_edges; i++)
1945 RESET_BIT (pre_insert_map[i], j);
1947 for (i = 0; i < (unsigned) last_basic_block; i++)
1948 RESET_BIT (pre_delete_map[i], j);
1951 sbitmap_free (prune_exprs);
1952 free (insertions);
1953 free (deletions);
1956 /* Top level routine to do the dataflow analysis needed by PRE. */
1958 static struct edge_list *
1959 compute_pre_data (void)
1961 struct edge_list *edge_list;
1962 basic_block bb;
1964 compute_local_properties (transp, comp, antloc, &expr_hash_table);
1965 prune_expressions (true);
1966 sbitmap_vector_zero (ae_kill, last_basic_block);
1968 /* Compute ae_kill for each basic block using:
1970 ~(TRANSP | COMP)
1973 FOR_EACH_BB (bb)
1975 sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
1976 sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
1979 edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc,
1980 ae_kill, &pre_insert_map, &pre_delete_map);
1981 sbitmap_vector_free (antloc);
1982 antloc = NULL;
1983 sbitmap_vector_free (ae_kill);
1984 ae_kill = NULL;
1986 prune_insertions_deletions (expr_hash_table.n_elems);
1988 return edge_list;
1991 /* PRE utilities */
1993 /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach
1994 block BB.
1996 VISITED is a pointer to a working buffer for tracking which BB's have
1997 been visited. It is NULL for the top-level call.
1999 We treat reaching expressions that go through blocks containing the same
2000 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
2001 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2002 2 as not reaching. The intent is to improve the probability of finding
2003 only one reaching expression and to reduce register lifetimes by picking
2004 the closest such expression. */
2006 static int
2007 pre_expr_reaches_here_p_work (basic_block occr_bb, struct expr *expr,
2008 basic_block bb, char *visited)
2010 edge pred;
2011 edge_iterator ei;
2013 FOR_EACH_EDGE (pred, ei, bb->preds)
2015 basic_block pred_bb = pred->src;
2017 if (pred->src == ENTRY_BLOCK_PTR
2018 /* Has predecessor has already been visited? */
2019 || visited[pred_bb->index])
2020 ;/* Nothing to do. */
2022 /* Does this predecessor generate this expression? */
2023 else if (TEST_BIT (comp[pred_bb->index], expr->bitmap_index))
2025 /* Is this the occurrence we're looking for?
2026 Note that there's only one generating occurrence per block
2027 so we just need to check the block number. */
2028 if (occr_bb == pred_bb)
2029 return 1;
2031 visited[pred_bb->index] = 1;
2033 /* Ignore this predecessor if it kills the expression. */
2034 else if (! TEST_BIT (transp[pred_bb->index], expr->bitmap_index))
2035 visited[pred_bb->index] = 1;
2037 /* Neither gen nor kill. */
2038 else
2040 visited[pred_bb->index] = 1;
2041 if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
2042 return 1;
2046 /* All paths have been checked. */
2047 return 0;
2050 /* The wrapper for pre_expr_reaches_here_work that ensures that any
2051 memory allocated for that function is returned. */
2053 static int
2054 pre_expr_reaches_here_p (basic_block occr_bb, struct expr *expr, basic_block bb)
2056 int rval;
2057 char *visited = XCNEWVEC (char, last_basic_block);
2059 rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
2061 free (visited);
2062 return rval;
2065 /* Generate RTL to copy an EXPR to its `reaching_reg' and return it. */
2067 static rtx
2068 process_insert_insn (struct expr *expr)
2070 rtx reg = expr->reaching_reg;
2071 /* Copy the expression to make sure we don't have any sharing issues. */
2072 rtx exp = copy_rtx (expr->expr);
2073 rtx pat;
2075 start_sequence ();
2077 /* If the expression is something that's an operand, like a constant,
2078 just copy it to a register. */
2079 if (general_operand (exp, GET_MODE (reg)))
2080 emit_move_insn (reg, exp);
2082 /* Otherwise, make a new insn to compute this expression and make sure the
2083 insn will be recognized (this also adds any needed CLOBBERs). */
2084 else
2086 rtx insn = emit_insn (gen_rtx_SET (VOIDmode, reg, exp));
2088 if (insn_invalid_p (insn, false))
2089 gcc_unreachable ();
2092 pat = get_insns ();
2093 end_sequence ();
2095 return pat;
2098 /* Add EXPR to the end of basic block BB.
2100 This is used by both the PRE and code hoisting. */
2102 static void
2103 insert_insn_end_basic_block (struct expr *expr, basic_block bb)
2105 rtx insn = BB_END (bb);
2106 rtx new_insn;
2107 rtx reg = expr->reaching_reg;
2108 int regno = REGNO (reg);
2109 rtx pat, pat_end;
2111 pat = process_insert_insn (expr);
2112 gcc_assert (pat && INSN_P (pat));
2114 pat_end = pat;
2115 while (NEXT_INSN (pat_end) != NULL_RTX)
2116 pat_end = NEXT_INSN (pat_end);
2118 /* If the last insn is a jump, insert EXPR in front [taking care to
2119 handle cc0, etc. properly]. Similarly we need to care trapping
2120 instructions in presence of non-call exceptions. */
2122 if (JUMP_P (insn)
2123 || (NONJUMP_INSN_P (insn)
2124 && (!single_succ_p (bb)
2125 || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
2127 #ifdef HAVE_cc0
2128 rtx note;
2129 #endif
2131 /* If this is a jump table, then we can't insert stuff here. Since
2132 we know the previous real insn must be the tablejump, we insert
2133 the new instruction just before the tablejump. */
2134 if (GET_CODE (PATTERN (insn)) == ADDR_VEC
2135 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
2136 insn = prev_active_insn (insn);
2138 #ifdef HAVE_cc0
2139 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
2140 if cc0 isn't set. */
2141 note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
2142 if (note)
2143 insn = XEXP (note, 0);
2144 else
2146 rtx maybe_cc0_setter = prev_nonnote_insn (insn);
2147 if (maybe_cc0_setter
2148 && INSN_P (maybe_cc0_setter)
2149 && sets_cc0_p (PATTERN (maybe_cc0_setter)))
2150 insn = maybe_cc0_setter;
2152 #endif
2153 /* FIXME: What if something in cc0/jump uses value set in new insn? */
2154 new_insn = emit_insn_before_noloc (pat, insn, bb);
2157 /* Likewise if the last insn is a call, as will happen in the presence
2158 of exception handling. */
2159 else if (CALL_P (insn)
2160 && (!single_succ_p (bb)
2161 || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
2163 /* Keeping in mind targets with small register classes and parameters
2164 in registers, we search backward and place the instructions before
2165 the first parameter is loaded. Do this for everyone for consistency
2166 and a presumption that we'll get better code elsewhere as well. */
2168 /* Since different machines initialize their parameter registers
2169 in different orders, assume nothing. Collect the set of all
2170 parameter registers. */
2171 insn = find_first_parameter_load (insn, BB_HEAD (bb));
2173 /* If we found all the parameter loads, then we want to insert
2174 before the first parameter load.
2176 If we did not find all the parameter loads, then we might have
2177 stopped on the head of the block, which could be a CODE_LABEL.
2178 If we inserted before the CODE_LABEL, then we would be putting
2179 the insn in the wrong basic block. In that case, put the insn
2180 after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */
2181 while (LABEL_P (insn)
2182 || NOTE_INSN_BASIC_BLOCK_P (insn))
2183 insn = NEXT_INSN (insn);
2185 new_insn = emit_insn_before_noloc (pat, insn, bb);
2187 else
2188 new_insn = emit_insn_after_noloc (pat, insn, bb);
2190 while (1)
2192 if (INSN_P (pat))
2193 add_label_notes (PATTERN (pat), new_insn);
2194 if (pat == pat_end)
2195 break;
2196 pat = NEXT_INSN (pat);
2199 gcse_create_count++;
2201 if (dump_file)
2203 fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ",
2204 bb->index, INSN_UID (new_insn));
2205 fprintf (dump_file, "copying expression %d to reg %d\n",
2206 expr->bitmap_index, regno);
2210 /* Insert partially redundant expressions on edges in the CFG to make
2211 the expressions fully redundant. */
2213 static int
2214 pre_edge_insert (struct edge_list *edge_list, struct expr **index_map)
2216 int e, i, j, num_edges, set_size, did_insert = 0;
2217 sbitmap *inserted;
2219 /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
2220 if it reaches any of the deleted expressions. */
2222 set_size = pre_insert_map[0]->size;
2223 num_edges = NUM_EDGES (edge_list);
2224 inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems);
2225 sbitmap_vector_zero (inserted, num_edges);
2227 for (e = 0; e < num_edges; e++)
2229 int indx;
2230 basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
2232 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
2234 SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
2236 for (j = indx;
2237 insert && j < (int) expr_hash_table.n_elems;
2238 j++, insert >>= 1)
2239 if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
2241 struct expr *expr = index_map[j];
2242 struct occr *occr;
2244 /* Now look at each deleted occurrence of this expression. */
2245 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2247 if (! occr->deleted_p)
2248 continue;
2250 /* Insert this expression on this edge if it would
2251 reach the deleted occurrence in BB. */
2252 if (!TEST_BIT (inserted[e], j))
2254 rtx insn;
2255 edge eg = INDEX_EDGE (edge_list, e);
2257 /* We can't insert anything on an abnormal and
2258 critical edge, so we insert the insn at the end of
2259 the previous block. There are several alternatives
2260 detailed in Morgans book P277 (sec 10.5) for
2261 handling this situation. This one is easiest for
2262 now. */
2264 if (eg->flags & EDGE_ABNORMAL)
2265 insert_insn_end_basic_block (index_map[j], bb);
2266 else
2268 insn = process_insert_insn (index_map[j]);
2269 insert_insn_on_edge (insn, eg);
2272 if (dump_file)
2274 fprintf (dump_file, "PRE: edge (%d,%d), ",
2275 bb->index,
2276 INDEX_EDGE_SUCC_BB (edge_list, e)->index);
2277 fprintf (dump_file, "copy expression %d\n",
2278 expr->bitmap_index);
2281 update_ld_motion_stores (expr);
2282 SET_BIT (inserted[e], j);
2283 did_insert = 1;
2284 gcse_create_count++;
2291 sbitmap_vector_free (inserted);
2292 return did_insert;
2295 /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
2296 Given "old_reg <- expr" (INSN), instead of adding after it
2297 reaching_reg <- old_reg
2298 it's better to do the following:
2299 reaching_reg <- expr
2300 old_reg <- reaching_reg
2301 because this way copy propagation can discover additional PRE
2302 opportunities. But if this fails, we try the old way.
2303 When "expr" is a store, i.e.
2304 given "MEM <- old_reg", instead of adding after it
2305 reaching_reg <- old_reg
2306 it's better to add it before as follows:
2307 reaching_reg <- old_reg
2308 MEM <- reaching_reg. */
2310 static void
2311 pre_insert_copy_insn (struct expr *expr, rtx insn)
2313 rtx reg = expr->reaching_reg;
2314 int regno = REGNO (reg);
2315 int indx = expr->bitmap_index;
2316 rtx pat = PATTERN (insn);
2317 rtx set, first_set, new_insn;
2318 rtx old_reg;
2319 int i;
2321 /* This block matches the logic in hash_scan_insn. */
2322 switch (GET_CODE (pat))
2324 case SET:
2325 set = pat;
2326 break;
2328 case PARALLEL:
2329 /* Search through the parallel looking for the set whose
2330 source was the expression that we're interested in. */
2331 first_set = NULL_RTX;
2332 set = NULL_RTX;
2333 for (i = 0; i < XVECLEN (pat, 0); i++)
2335 rtx x = XVECEXP (pat, 0, i);
2336 if (GET_CODE (x) == SET)
2338 /* If the source was a REG_EQUAL or REG_EQUIV note, we
2339 may not find an equivalent expression, but in this
2340 case the PARALLEL will have a single set. */
2341 if (first_set == NULL_RTX)
2342 first_set = x;
2343 if (expr_equiv_p (SET_SRC (x), expr->expr))
2345 set = x;
2346 break;
2351 gcc_assert (first_set);
2352 if (set == NULL_RTX)
2353 set = first_set;
2354 break;
2356 default:
2357 gcc_unreachable ();
2360 if (REG_P (SET_DEST (set)))
2362 old_reg = SET_DEST (set);
2363 /* Check if we can modify the set destination in the original insn. */
2364 if (validate_change (insn, &SET_DEST (set), reg, 0))
2366 new_insn = gen_move_insn (old_reg, reg);
2367 new_insn = emit_insn_after (new_insn, insn);
2369 else
2371 new_insn = gen_move_insn (reg, old_reg);
2372 new_insn = emit_insn_after (new_insn, insn);
2375 else /* This is possible only in case of a store to memory. */
2377 old_reg = SET_SRC (set);
2378 new_insn = gen_move_insn (reg, old_reg);
2380 /* Check if we can modify the set source in the original insn. */
2381 if (validate_change (insn, &SET_SRC (set), reg, 0))
2382 new_insn = emit_insn_before (new_insn, insn);
2383 else
2384 new_insn = emit_insn_after (new_insn, insn);
2387 gcse_create_count++;
2389 if (dump_file)
2390 fprintf (dump_file,
2391 "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
2392 BLOCK_FOR_INSN (insn)->index, INSN_UID (new_insn), indx,
2393 INSN_UID (insn), regno);
2396 /* Copy available expressions that reach the redundant expression
2397 to `reaching_reg'. */
2399 static void
2400 pre_insert_copies (void)
2402 unsigned int i, added_copy;
2403 struct expr *expr;
2404 struct occr *occr;
2405 struct occr *avail;
2407 /* For each available expression in the table, copy the result to
2408 `reaching_reg' if the expression reaches a deleted one.
2410 ??? The current algorithm is rather brute force.
2411 Need to do some profiling. */
2413 for (i = 0; i < expr_hash_table.size; i++)
2414 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2416 /* If the basic block isn't reachable, PPOUT will be TRUE. However,
2417 we don't want to insert a copy here because the expression may not
2418 really be redundant. So only insert an insn if the expression was
2419 deleted. This test also avoids further processing if the
2420 expression wasn't deleted anywhere. */
2421 if (expr->reaching_reg == NULL)
2422 continue;
2424 /* Set when we add a copy for that expression. */
2425 added_copy = 0;
2427 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2429 if (! occr->deleted_p)
2430 continue;
2432 for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
2434 rtx insn = avail->insn;
2436 /* No need to handle this one if handled already. */
2437 if (avail->copied_p)
2438 continue;
2440 /* Don't handle this one if it's a redundant one. */
2441 if (INSN_DELETED_P (insn))
2442 continue;
2444 /* Or if the expression doesn't reach the deleted one. */
2445 if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
2446 expr,
2447 BLOCK_FOR_INSN (occr->insn)))
2448 continue;
2450 added_copy = 1;
2452 /* Copy the result of avail to reaching_reg. */
2453 pre_insert_copy_insn (expr, insn);
2454 avail->copied_p = 1;
2458 if (added_copy)
2459 update_ld_motion_stores (expr);
2463 /* Emit move from SRC to DEST noting the equivalence with expression computed
2464 in INSN. */
2466 static rtx
2467 gcse_emit_move_after (rtx dest, rtx src, rtx insn)
2469 rtx new_rtx;
2470 rtx set = single_set (insn), set2;
2471 rtx note;
2472 rtx eqv;
2474 /* This should never fail since we're creating a reg->reg copy
2475 we've verified to be valid. */
2477 new_rtx = emit_insn_after (gen_move_insn (dest, src), insn);
2479 /* Note the equivalence for local CSE pass. */
2480 set2 = single_set (new_rtx);
2481 if (!set2 || !rtx_equal_p (SET_DEST (set2), dest))
2482 return new_rtx;
2483 if ((note = find_reg_equal_equiv_note (insn)))
2484 eqv = XEXP (note, 0);
2485 else
2486 eqv = SET_SRC (set);
2488 set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv));
2490 return new_rtx;
2493 /* Delete redundant computations.
2494 Deletion is done by changing the insn to copy the `reaching_reg' of
2495 the expression into the result of the SET. It is left to later passes
2496 (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
2498 Return nonzero if a change is made. */
2500 static int
2501 pre_delete (void)
2503 unsigned int i;
2504 int changed;
2505 struct expr *expr;
2506 struct occr *occr;
2508 changed = 0;
2509 for (i = 0; i < expr_hash_table.size; i++)
2510 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2512 int indx = expr->bitmap_index;
2514 /* We only need to search antic_occr since we require ANTLOC != 0. */
2515 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2517 rtx insn = occr->insn;
2518 rtx set;
2519 basic_block bb = BLOCK_FOR_INSN (insn);
2521 /* We only delete insns that have a single_set. */
2522 if (TEST_BIT (pre_delete_map[bb->index], indx)
2523 && (set = single_set (insn)) != 0
2524 && dbg_cnt (pre_insn))
2526 /* Create a pseudo-reg to store the result of reaching
2527 expressions into. Get the mode for the new pseudo from
2528 the mode of the original destination pseudo. */
2529 if (expr->reaching_reg == NULL)
2530 expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set));
2532 gcse_emit_move_after (SET_DEST (set), expr->reaching_reg, insn);
2533 delete_insn (insn);
2534 occr->deleted_p = 1;
2535 changed = 1;
2536 gcse_subst_count++;
2538 if (dump_file)
2540 fprintf (dump_file,
2541 "PRE: redundant insn %d (expression %d) in ",
2542 INSN_UID (insn), indx);
2543 fprintf (dump_file, "bb %d, reaching reg is %d\n",
2544 bb->index, REGNO (expr->reaching_reg));
2550 return changed;
2553 /* Perform GCSE optimizations using PRE.
2554 This is called by one_pre_gcse_pass after all the dataflow analysis
2555 has been done.
2557 This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
2558 lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
2559 Compiler Design and Implementation.
2561 ??? A new pseudo reg is created to hold the reaching expression. The nice
2562 thing about the classical approach is that it would try to use an existing
2563 reg. If the register can't be adequately optimized [i.e. we introduce
2564 reload problems], one could add a pass here to propagate the new register
2565 through the block.
2567 ??? We don't handle single sets in PARALLELs because we're [currently] not
2568 able to copy the rest of the parallel when we insert copies to create full
2569 redundancies from partial redundancies. However, there's no reason why we
2570 can't handle PARALLELs in the cases where there are no partial
2571 redundancies. */
2573 static int
2574 pre_gcse (struct edge_list *edge_list)
2576 unsigned int i;
2577 int did_insert, changed;
2578 struct expr **index_map;
2579 struct expr *expr;
2581 /* Compute a mapping from expression number (`bitmap_index') to
2582 hash table entry. */
2584 index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
2585 for (i = 0; i < expr_hash_table.size; i++)
2586 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2587 index_map[expr->bitmap_index] = expr;
2589 /* Delete the redundant insns first so that
2590 - we know what register to use for the new insns and for the other
2591 ones with reaching expressions
2592 - we know which insns are redundant when we go to create copies */
2594 changed = pre_delete ();
2595 did_insert = pre_edge_insert (edge_list, index_map);
2597 /* In other places with reaching expressions, copy the expression to the
2598 specially allocated pseudo-reg that reaches the redundant expr. */
2599 pre_insert_copies ();
2600 if (did_insert)
2602 commit_edge_insertions ();
2603 changed = 1;
2606 free (index_map);
2607 return changed;
2610 /* Top level routine to perform one PRE GCSE pass.
2612 Return nonzero if a change was made. */
2614 static int
2615 one_pre_gcse_pass (void)
2617 int changed = 0;
2619 gcse_subst_count = 0;
2620 gcse_create_count = 0;
2622 /* Return if there's nothing to do, or it is too expensive. */
2623 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
2624 || is_too_expensive (_("PRE disabled")))
2625 return 0;
2627 /* We need alias. */
2628 init_alias_analysis ();
2630 bytes_used = 0;
2631 gcc_obstack_init (&gcse_obstack);
2632 alloc_gcse_mem ();
2634 alloc_hash_table (&expr_hash_table);
2635 add_noreturn_fake_exit_edges ();
2636 if (flag_gcse_lm)
2637 compute_ld_motion_mems ();
2639 compute_hash_table (&expr_hash_table);
2640 if (flag_gcse_lm)
2641 trim_ld_motion_mems ();
2642 if (dump_file)
2643 dump_hash_table (dump_file, "Expression", &expr_hash_table);
2645 if (expr_hash_table.n_elems > 0)
2647 struct edge_list *edge_list;
2648 alloc_pre_mem (last_basic_block, expr_hash_table.n_elems);
2649 edge_list = compute_pre_data ();
2650 changed |= pre_gcse (edge_list);
2651 free_edge_list (edge_list);
2652 free_pre_mem ();
2655 if (flag_gcse_lm)
2656 free_ld_motion_mems ();
2657 remove_fake_exit_edges ();
2658 free_hash_table (&expr_hash_table);
2660 free_gcse_mem ();
2661 obstack_free (&gcse_obstack, NULL);
2663 /* We are finished with alias. */
2664 end_alias_analysis ();
2666 if (dump_file)
2668 fprintf (dump_file, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ",
2669 current_function_name (), n_basic_blocks, bytes_used);
2670 fprintf (dump_file, "%d substs, %d insns created\n",
2671 gcse_subst_count, gcse_create_count);
2674 return changed;
2677 /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
2678 to INSN. If such notes are added to an insn which references a
2679 CODE_LABEL, the LABEL_NUSES count is incremented. We have to add
2680 that note, because the following loop optimization pass requires
2681 them. */
2683 /* ??? If there was a jump optimization pass after gcse and before loop,
2684 then we would not need to do this here, because jump would add the
2685 necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes. */
2687 static void
2688 add_label_notes (rtx x, rtx insn)
2690 enum rtx_code code = GET_CODE (x);
2691 int i, j;
2692 const char *fmt;
2694 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
2696 /* This code used to ignore labels that referred to dispatch tables to
2697 avoid flow generating (slightly) worse code.
2699 We no longer ignore such label references (see LABEL_REF handling in
2700 mark_jump_label for additional information). */
2702 /* There's no reason for current users to emit jump-insns with
2703 such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
2704 notes. */
2705 gcc_assert (!JUMP_P (insn));
2706 add_reg_note (insn, REG_LABEL_OPERAND, XEXP (x, 0));
2708 if (LABEL_P (XEXP (x, 0)))
2709 LABEL_NUSES (XEXP (x, 0))++;
2711 return;
2714 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2716 if (fmt[i] == 'e')
2717 add_label_notes (XEXP (x, i), insn);
2718 else if (fmt[i] == 'E')
2719 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2720 add_label_notes (XVECEXP (x, i, j), insn);
2724 /* Code Hoisting variables and subroutines. */
2726 /* Very busy expressions. */
2727 static sbitmap *hoist_vbein;
2728 static sbitmap *hoist_vbeout;
2730 /* ??? We could compute post dominators and run this algorithm in
2731 reverse to perform tail merging, doing so would probably be
2732 more effective than the tail merging code in jump.c.
2734 It's unclear if tail merging could be run in parallel with
2735 code hoisting. It would be nice. */
2737 /* Allocate vars used for code hoisting analysis. */
2739 static void
2740 alloc_code_hoist_mem (int n_blocks, int n_exprs)
2742 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
2743 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
2744 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
2746 hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
2747 hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
2750 /* Free vars used for code hoisting analysis. */
2752 static void
2753 free_code_hoist_mem (void)
2755 sbitmap_vector_free (antloc);
2756 sbitmap_vector_free (transp);
2757 sbitmap_vector_free (comp);
2759 sbitmap_vector_free (hoist_vbein);
2760 sbitmap_vector_free (hoist_vbeout);
2762 free_dominance_info (CDI_DOMINATORS);
2765 /* Compute the very busy expressions at entry/exit from each block.
2767 An expression is very busy if all paths from a given point
2768 compute the expression. */
2770 static void
2771 compute_code_hoist_vbeinout (void)
2773 int changed, passes;
2774 basic_block bb;
2776 sbitmap_vector_zero (hoist_vbeout, last_basic_block);
2777 sbitmap_vector_zero (hoist_vbein, last_basic_block);
2779 passes = 0;
2780 changed = 1;
2782 while (changed)
2784 changed = 0;
2786 /* We scan the blocks in the reverse order to speed up
2787 the convergence. */
2788 FOR_EACH_BB_REVERSE (bb)
2790 if (bb->next_bb != EXIT_BLOCK_PTR)
2792 sbitmap_intersection_of_succs (hoist_vbeout[bb->index],
2793 hoist_vbein, bb);
2795 /* Include expressions in VBEout that are calculated
2796 in BB and available at its end. */
2797 sbitmap_a_or_b (hoist_vbeout[bb->index],
2798 hoist_vbeout[bb->index], comp[bb->index]);
2801 changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index],
2802 antloc[bb->index],
2803 hoist_vbeout[bb->index],
2804 transp[bb->index]);
2807 passes++;
2810 if (dump_file)
2812 fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
2814 FOR_EACH_BB (bb)
2816 fprintf (dump_file, "vbein (%d): ", bb->index);
2817 dump_sbitmap_file (dump_file, hoist_vbein[bb->index]);
2818 fprintf (dump_file, "vbeout(%d): ", bb->index);
2819 dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]);
2824 /* Top level routine to do the dataflow analysis needed by code hoisting. */
2826 static void
2827 compute_code_hoist_data (void)
2829 compute_local_properties (transp, comp, antloc, &expr_hash_table);
2830 prune_expressions (false);
2831 compute_code_hoist_vbeinout ();
2832 calculate_dominance_info (CDI_DOMINATORS);
2833 if (dump_file)
2834 fprintf (dump_file, "\n");
2837 /* Determine if the expression identified by EXPR_INDEX would
2838 reach BB unimpared if it was placed at the end of EXPR_BB.
2839 Stop the search if the expression would need to be moved more
2840 than DISTANCE instructions.
2842 It's unclear exactly what Muchnick meant by "unimpared". It seems
2843 to me that the expression must either be computed or transparent in
2844 *every* block in the path(s) from EXPR_BB to BB. Any other definition
2845 would allow the expression to be hoisted out of loops, even if
2846 the expression wasn't a loop invariant.
2848 Contrast this to reachability for PRE where an expression is
2849 considered reachable if *any* path reaches instead of *all*
2850 paths. */
2852 static int
2853 hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb,
2854 char *visited, int distance, int *bb_size)
2856 edge pred;
2857 edge_iterator ei;
2858 int visited_allocated_locally = 0;
2860 /* Terminate the search if distance, for which EXPR is allowed to move,
2861 is exhausted. */
2862 if (distance > 0)
2864 distance -= bb_size[bb->index];
2866 if (distance <= 0)
2867 return 0;
2869 else
2870 gcc_assert (distance == 0);
2872 if (visited == NULL)
2874 visited_allocated_locally = 1;
2875 visited = XCNEWVEC (char, last_basic_block);
2878 FOR_EACH_EDGE (pred, ei, bb->preds)
2880 basic_block pred_bb = pred->src;
2882 if (pred->src == ENTRY_BLOCK_PTR)
2883 break;
2884 else if (pred_bb == expr_bb)
2885 continue;
2886 else if (visited[pred_bb->index])
2887 continue;
2889 else if (! TEST_BIT (transp[pred_bb->index], expr_index))
2890 break;
2892 /* Not killed. */
2893 else
2895 visited[pred_bb->index] = 1;
2896 if (! hoist_expr_reaches_here_p (expr_bb, expr_index, pred_bb,
2897 visited, distance, bb_size))
2898 break;
2901 if (visited_allocated_locally)
2902 free (visited);
2904 return (pred == NULL);
2907 /* Find occurrence in BB. */
2909 static struct occr *
2910 find_occr_in_bb (struct occr *occr, basic_block bb)
2912 /* Find the right occurrence of this expression. */
2913 while (occr && BLOCK_FOR_INSN (occr->insn) != bb)
2914 occr = occr->next;
2916 return occr;
2919 /* Actually perform code hoisting. */
2921 static int
2922 hoist_code (void)
2924 basic_block bb, dominated;
2925 VEC (basic_block, heap) *dom_tree_walk;
2926 unsigned int dom_tree_walk_index;
2927 VEC (basic_block, heap) *domby;
2928 unsigned int i,j;
2929 struct expr **index_map;
2930 struct expr *expr;
2931 int *to_bb_head;
2932 int *bb_size;
2933 int changed = 0;
2935 /* Compute a mapping from expression number (`bitmap_index') to
2936 hash table entry. */
2938 index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
2939 for (i = 0; i < expr_hash_table.size; i++)
2940 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2941 index_map[expr->bitmap_index] = expr;
2943 /* Calculate sizes of basic blocks and note how far
2944 each instruction is from the start of its block. We then use this
2945 data to restrict distance an expression can travel. */
2947 to_bb_head = XCNEWVEC (int, get_max_uid ());
2948 bb_size = XCNEWVEC (int, last_basic_block);
2950 FOR_EACH_BB (bb)
2952 rtx insn;
2953 int to_head;
2955 to_head = 0;
2956 FOR_BB_INSNS (bb, insn)
2958 /* Don't count debug instructions to avoid them affecting
2959 decision choices. */
2960 if (NONDEBUG_INSN_P (insn))
2961 to_bb_head[INSN_UID (insn)] = to_head++;
2964 bb_size[bb->index] = to_head;
2967 gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR->succs) == 1
2968 && (EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest
2969 == ENTRY_BLOCK_PTR->next_bb));
2971 dom_tree_walk = get_all_dominated_blocks (CDI_DOMINATORS,
2972 ENTRY_BLOCK_PTR->next_bb);
2974 /* Walk over each basic block looking for potentially hoistable
2975 expressions, nothing gets hoisted from the entry block. */
2976 FOR_EACH_VEC_ELT (basic_block, dom_tree_walk, dom_tree_walk_index, bb)
2978 domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH);
2980 if (VEC_length (basic_block, domby) == 0)
2981 continue;
2983 /* Examine each expression that is very busy at the exit of this
2984 block. These are the potentially hoistable expressions. */
2985 for (i = 0; i < SBITMAP_SIZE (hoist_vbeout[bb->index]); i++)
2987 if (TEST_BIT (hoist_vbeout[bb->index], i))
2989 /* Current expression. */
2990 struct expr *expr = index_map[i];
2991 /* Number of occurrences of EXPR that can be hoisted to BB. */
2992 int hoistable = 0;
2993 /* Basic blocks that have occurrences reachable from BB. */
2994 bitmap_head _from_bbs, *from_bbs = &_from_bbs;
2995 /* Occurrences reachable from BB. */
2996 VEC (occr_t, heap) *occrs_to_hoist = NULL;
2997 /* We want to insert the expression into BB only once, so
2998 note when we've inserted it. */
2999 int insn_inserted_p;
3000 occr_t occr;
3002 bitmap_initialize (from_bbs, 0);
3004 /* If an expression is computed in BB and is available at end of
3005 BB, hoist all occurrences dominated by BB to BB. */
3006 if (TEST_BIT (comp[bb->index], i))
3008 occr = find_occr_in_bb (expr->antic_occr, bb);
3010 if (occr)
3012 /* An occurrence might've been already deleted
3013 while processing a dominator of BB. */
3014 if (!occr->deleted_p)
3016 gcc_assert (NONDEBUG_INSN_P (occr->insn));
3017 hoistable++;
3020 else
3021 hoistable++;
3024 /* We've found a potentially hoistable expression, now
3025 we look at every block BB dominates to see if it
3026 computes the expression. */
3027 FOR_EACH_VEC_ELT (basic_block, domby, j, dominated)
3029 int max_distance;
3031 /* Ignore self dominance. */
3032 if (bb == dominated)
3033 continue;
3034 /* We've found a dominated block, now see if it computes
3035 the busy expression and whether or not moving that
3036 expression to the "beginning" of that block is safe. */
3037 if (!TEST_BIT (antloc[dominated->index], i))
3038 continue;
3040 occr = find_occr_in_bb (expr->antic_occr, dominated);
3041 gcc_assert (occr);
3043 /* An occurrence might've been already deleted
3044 while processing a dominator of BB. */
3045 if (occr->deleted_p)
3046 continue;
3047 gcc_assert (NONDEBUG_INSN_P (occr->insn));
3049 max_distance = expr->max_distance;
3050 if (max_distance > 0)
3051 /* Adjust MAX_DISTANCE to account for the fact that
3052 OCCR won't have to travel all of DOMINATED, but
3053 only part of it. */
3054 max_distance += (bb_size[dominated->index]
3055 - to_bb_head[INSN_UID (occr->insn)]);
3057 /* Note if the expression would reach the dominated block
3058 unimpared if it was placed at the end of BB.
3060 Keep track of how many times this expression is hoistable
3061 from a dominated block into BB. */
3062 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL,
3063 max_distance, bb_size))
3065 hoistable++;
3066 VEC_safe_push (occr_t, heap,
3067 occrs_to_hoist, occr);
3068 bitmap_set_bit (from_bbs, dominated->index);
3072 /* If we found more than one hoistable occurrence of this
3073 expression, then note it in the vector of expressions to
3074 hoist. It makes no sense to hoist things which are computed
3075 in only one BB, and doing so tends to pessimize register
3076 allocation. One could increase this value to try harder
3077 to avoid any possible code expansion due to register
3078 allocation issues; however experiments have shown that
3079 the vast majority of hoistable expressions are only movable
3080 from two successors, so raising this threshold is likely
3081 to nullify any benefit we get from code hoisting. */
3082 if (hoistable > 1 && dbg_cnt (hoist_insn))
3084 /* If (hoistable != VEC_length), then there is
3085 an occurrence of EXPR in BB itself. Don't waste
3086 time looking for LCA in this case. */
3087 if ((unsigned) hoistable
3088 == VEC_length (occr_t, occrs_to_hoist))
3090 basic_block lca;
3092 lca = nearest_common_dominator_for_set (CDI_DOMINATORS,
3093 from_bbs);
3094 if (lca != bb)
3095 /* Punt, it's better to hoist these occurrences to
3096 LCA. */
3097 VEC_free (occr_t, heap, occrs_to_hoist);
3100 else
3101 /* Punt, no point hoisting a single occurence. */
3102 VEC_free (occr_t, heap, occrs_to_hoist);
3104 insn_inserted_p = 0;
3106 /* Walk through occurrences of I'th expressions we want
3107 to hoist to BB and make the transformations. */
3108 FOR_EACH_VEC_ELT (occr_t, occrs_to_hoist, j, occr)
3110 rtx insn;
3111 rtx set;
3113 gcc_assert (!occr->deleted_p);
3115 insn = occr->insn;
3116 set = single_set (insn);
3117 gcc_assert (set);
3119 /* Create a pseudo-reg to store the result of reaching
3120 expressions into. Get the mode for the new pseudo
3121 from the mode of the original destination pseudo.
3123 It is important to use new pseudos whenever we
3124 emit a set. This will allow reload to use
3125 rematerialization for such registers. */
3126 if (!insn_inserted_p)
3127 expr->reaching_reg
3128 = gen_reg_rtx_and_attrs (SET_DEST (set));
3130 gcse_emit_move_after (SET_DEST (set), expr->reaching_reg,
3131 insn);
3132 delete_insn (insn);
3133 occr->deleted_p = 1;
3134 changed = 1;
3135 gcse_subst_count++;
3137 if (!insn_inserted_p)
3139 insert_insn_end_basic_block (expr, bb);
3140 insn_inserted_p = 1;
3144 VEC_free (occr_t, heap, occrs_to_hoist);
3145 bitmap_clear (from_bbs);
3148 VEC_free (basic_block, heap, domby);
3151 VEC_free (basic_block, heap, dom_tree_walk);
3152 free (bb_size);
3153 free (to_bb_head);
3154 free (index_map);
3156 return changed;
3159 /* Top level routine to perform one code hoisting (aka unification) pass
3161 Return nonzero if a change was made. */
3163 static int
3164 one_code_hoisting_pass (void)
3166 int changed = 0;
3168 gcse_subst_count = 0;
3169 gcse_create_count = 0;
3171 /* Return if there's nothing to do, or it is too expensive. */
3172 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
3173 || is_too_expensive (_("GCSE disabled")))
3174 return 0;
3176 doing_code_hoisting_p = true;
3178 /* We need alias. */
3179 init_alias_analysis ();
3181 bytes_used = 0;
3182 gcc_obstack_init (&gcse_obstack);
3183 alloc_gcse_mem ();
3185 alloc_hash_table (&expr_hash_table);
3186 compute_hash_table (&expr_hash_table);
3187 if (dump_file)
3188 dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table);
3190 if (expr_hash_table.n_elems > 0)
3192 alloc_code_hoist_mem (last_basic_block, expr_hash_table.n_elems);
3193 compute_code_hoist_data ();
3194 changed = hoist_code ();
3195 free_code_hoist_mem ();
3198 free_hash_table (&expr_hash_table);
3199 free_gcse_mem ();
3200 obstack_free (&gcse_obstack, NULL);
3202 /* We are finished with alias. */
3203 end_alias_analysis ();
3205 if (dump_file)
3207 fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ",
3208 current_function_name (), n_basic_blocks, bytes_used);
3209 fprintf (dump_file, "%d substs, %d insns created\n",
3210 gcse_subst_count, gcse_create_count);
3213 doing_code_hoisting_p = false;
3215 return changed;
3218 /* Here we provide the things required to do store motion towards the exit.
3219 In order for this to be effective, gcse also needed to be taught how to
3220 move a load when it is killed only by a store to itself.
3222 int i;
3223 float a[10];
3225 void foo(float scale)
3227 for (i=0; i<10; i++)
3228 a[i] *= scale;
3231 'i' is both loaded and stored to in the loop. Normally, gcse cannot move
3232 the load out since its live around the loop, and stored at the bottom
3233 of the loop.
3235 The 'Load Motion' referred to and implemented in this file is
3236 an enhancement to gcse which when using edge based LCM, recognizes
3237 this situation and allows gcse to move the load out of the loop.
3239 Once gcse has hoisted the load, store motion can then push this
3240 load towards the exit, and we end up with no loads or stores of 'i'
3241 in the loop. */
3243 static hashval_t
3244 pre_ldst_expr_hash (const void *p)
3246 int do_not_record_p = 0;
3247 const struct ls_expr *const x = (const struct ls_expr *) p;
3248 return
3249 hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false);
3252 static int
3253 pre_ldst_expr_eq (const void *p1, const void *p2)
3255 const struct ls_expr *const ptr1 = (const struct ls_expr *) p1,
3256 *const ptr2 = (const struct ls_expr *) p2;
3257 return expr_equiv_p (ptr1->pattern, ptr2->pattern);
3260 /* This will search the ldst list for a matching expression. If it
3261 doesn't find one, we create one and initialize it. */
3263 static struct ls_expr *
3264 ldst_entry (rtx x)
3266 int do_not_record_p = 0;
3267 struct ls_expr * ptr;
3268 unsigned int hash;
3269 void **slot;
3270 struct ls_expr e;
3272 hash = hash_rtx (x, GET_MODE (x), &do_not_record_p,
3273 NULL, /*have_reg_qty=*/false);
3275 e.pattern = x;
3276 slot = htab_find_slot_with_hash (pre_ldst_table, &e, hash, INSERT);
3277 if (*slot)
3278 return (struct ls_expr *)*slot;
3280 ptr = XNEW (struct ls_expr);
3282 ptr->next = pre_ldst_mems;
3283 ptr->expr = NULL;
3284 ptr->pattern = x;
3285 ptr->pattern_regs = NULL_RTX;
3286 ptr->loads = NULL_RTX;
3287 ptr->stores = NULL_RTX;
3288 ptr->reaching_reg = NULL_RTX;
3289 ptr->invalid = 0;
3290 ptr->index = 0;
3291 ptr->hash_index = hash;
3292 pre_ldst_mems = ptr;
3293 *slot = ptr;
3295 return ptr;
3298 /* Free up an individual ldst entry. */
3300 static void
3301 free_ldst_entry (struct ls_expr * ptr)
3303 free_INSN_LIST_list (& ptr->loads);
3304 free_INSN_LIST_list (& ptr->stores);
3306 free (ptr);
3309 /* Free up all memory associated with the ldst list. */
3311 static void
3312 free_ld_motion_mems (void)
3314 if (pre_ldst_table)
3315 htab_delete (pre_ldst_table);
3316 pre_ldst_table = NULL;
3318 while (pre_ldst_mems)
3320 struct ls_expr * tmp = pre_ldst_mems;
3322 pre_ldst_mems = pre_ldst_mems->next;
3324 free_ldst_entry (tmp);
3327 pre_ldst_mems = NULL;
3330 /* Dump debugging info about the ldst list. */
3332 static void
3333 print_ldst_list (FILE * file)
3335 struct ls_expr * ptr;
3337 fprintf (file, "LDST list: \n");
3339 for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
3341 fprintf (file, " Pattern (%3d): ", ptr->index);
3343 print_rtl (file, ptr->pattern);
3345 fprintf (file, "\n Loads : ");
3347 if (ptr->loads)
3348 print_rtl (file, ptr->loads);
3349 else
3350 fprintf (file, "(nil)");
3352 fprintf (file, "\n Stores : ");
3354 if (ptr->stores)
3355 print_rtl (file, ptr->stores);
3356 else
3357 fprintf (file, "(nil)");
3359 fprintf (file, "\n\n");
3362 fprintf (file, "\n");
3365 /* Returns 1 if X is in the list of ldst only expressions. */
3367 static struct ls_expr *
3368 find_rtx_in_ldst (rtx x)
3370 struct ls_expr e;
3371 void **slot;
3372 if (!pre_ldst_table)
3373 return NULL;
3374 e.pattern = x;
3375 slot = htab_find_slot (pre_ldst_table, &e, NO_INSERT);
3376 if (!slot || ((struct ls_expr *)*slot)->invalid)
3377 return NULL;
3378 return (struct ls_expr *) *slot;
3381 /* Load Motion for loads which only kill themselves. */
3383 /* Return true if x, a MEM, is a simple access with no side effects.
3384 These are the types of loads we consider for the ld_motion list,
3385 otherwise we let the usual aliasing take care of it. */
3387 static int
3388 simple_mem (const_rtx x)
3390 if (MEM_VOLATILE_P (x))
3391 return 0;
3393 if (GET_MODE (x) == BLKmode)
3394 return 0;
3396 /* If we are handling exceptions, we must be careful with memory references
3397 that may trap. If we are not, the behavior is undefined, so we may just
3398 continue. */
3399 if (cfun->can_throw_non_call_exceptions && may_trap_p (x))
3400 return 0;
3402 if (side_effects_p (x))
3403 return 0;
3405 /* Do not consider function arguments passed on stack. */
3406 if (reg_mentioned_p (stack_pointer_rtx, x))
3407 return 0;
3409 if (flag_float_store && FLOAT_MODE_P (GET_MODE (x)))
3410 return 0;
3412 return 1;
3415 /* Make sure there isn't a buried reference in this pattern anywhere.
3416 If there is, invalidate the entry for it since we're not capable
3417 of fixing it up just yet.. We have to be sure we know about ALL
3418 loads since the aliasing code will allow all entries in the
3419 ld_motion list to not-alias itself. If we miss a load, we will get
3420 the wrong value since gcse might common it and we won't know to
3421 fix it up. */
3423 static void
3424 invalidate_any_buried_refs (rtx x)
3426 const char * fmt;
3427 int i, j;
3428 struct ls_expr * ptr;
3430 /* Invalidate it in the list. */
3431 if (MEM_P (x) && simple_mem (x))
3433 ptr = ldst_entry (x);
3434 ptr->invalid = 1;
3437 /* Recursively process the insn. */
3438 fmt = GET_RTX_FORMAT (GET_CODE (x));
3440 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3442 if (fmt[i] == 'e')
3443 invalidate_any_buried_refs (XEXP (x, i));
3444 else if (fmt[i] == 'E')
3445 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3446 invalidate_any_buried_refs (XVECEXP (x, i, j));
3450 /* Find all the 'simple' MEMs which are used in LOADs and STORES. Simple
3451 being defined as MEM loads and stores to symbols, with no side effects
3452 and no registers in the expression. For a MEM destination, we also
3453 check that the insn is still valid if we replace the destination with a
3454 REG, as is done in update_ld_motion_stores. If there are any uses/defs
3455 which don't match this criteria, they are invalidated and trimmed out
3456 later. */
3458 static void
3459 compute_ld_motion_mems (void)
3461 struct ls_expr * ptr;
3462 basic_block bb;
3463 rtx insn;
3465 pre_ldst_mems = NULL;
3466 pre_ldst_table
3467 = htab_create (13, pre_ldst_expr_hash, pre_ldst_expr_eq, NULL);
3469 FOR_EACH_BB (bb)
3471 FOR_BB_INSNS (bb, insn)
3473 if (NONDEBUG_INSN_P (insn))
3475 if (GET_CODE (PATTERN (insn)) == SET)
3477 rtx src = SET_SRC (PATTERN (insn));
3478 rtx dest = SET_DEST (PATTERN (insn));
3480 /* Check for a simple LOAD... */
3481 if (MEM_P (src) && simple_mem (src))
3483 ptr = ldst_entry (src);
3484 if (REG_P (dest))
3485 ptr->loads = alloc_INSN_LIST (insn, ptr->loads);
3486 else
3487 ptr->invalid = 1;
3489 else
3491 /* Make sure there isn't a buried load somewhere. */
3492 invalidate_any_buried_refs (src);
3495 /* Check for stores. Don't worry about aliased ones, they
3496 will block any movement we might do later. We only care
3497 about this exact pattern since those are the only
3498 circumstance that we will ignore the aliasing info. */
3499 if (MEM_P (dest) && simple_mem (dest))
3501 ptr = ldst_entry (dest);
3503 if (! MEM_P (src)
3504 && GET_CODE (src) != ASM_OPERANDS
3505 /* Check for REG manually since want_to_gcse_p
3506 returns 0 for all REGs. */
3507 && can_assign_to_reg_without_clobbers_p (src))
3508 ptr->stores = alloc_INSN_LIST (insn, ptr->stores);
3509 else
3510 ptr->invalid = 1;
3513 else
3514 invalidate_any_buried_refs (PATTERN (insn));
3520 /* Remove any references that have been either invalidated or are not in the
3521 expression list for pre gcse. */
3523 static void
3524 trim_ld_motion_mems (void)
3526 struct ls_expr * * last = & pre_ldst_mems;
3527 struct ls_expr * ptr = pre_ldst_mems;
3529 while (ptr != NULL)
3531 struct expr * expr;
3533 /* Delete if entry has been made invalid. */
3534 if (! ptr->invalid)
3536 /* Delete if we cannot find this mem in the expression list. */
3537 unsigned int hash = ptr->hash_index % expr_hash_table.size;
3539 for (expr = expr_hash_table.table[hash];
3540 expr != NULL;
3541 expr = expr->next_same_hash)
3542 if (expr_equiv_p (expr->expr, ptr->pattern))
3543 break;
3545 else
3546 expr = (struct expr *) 0;
3548 if (expr)
3550 /* Set the expression field if we are keeping it. */
3551 ptr->expr = expr;
3552 last = & ptr->next;
3553 ptr = ptr->next;
3555 else
3557 *last = ptr->next;
3558 htab_remove_elt_with_hash (pre_ldst_table, ptr, ptr->hash_index);
3559 free_ldst_entry (ptr);
3560 ptr = * last;
3564 /* Show the world what we've found. */
3565 if (dump_file && pre_ldst_mems != NULL)
3566 print_ldst_list (dump_file);
3569 /* This routine will take an expression which we are replacing with
3570 a reaching register, and update any stores that are needed if
3571 that expression is in the ld_motion list. Stores are updated by
3572 copying their SRC to the reaching register, and then storing
3573 the reaching register into the store location. These keeps the
3574 correct value in the reaching register for the loads. */
3576 static void
3577 update_ld_motion_stores (struct expr * expr)
3579 struct ls_expr * mem_ptr;
3581 if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
3583 /* We can try to find just the REACHED stores, but is shouldn't
3584 matter to set the reaching reg everywhere... some might be
3585 dead and should be eliminated later. */
3587 /* We replace (set mem expr) with (set reg expr) (set mem reg)
3588 where reg is the reaching reg used in the load. We checked in
3589 compute_ld_motion_mems that we can replace (set mem expr) with
3590 (set reg expr) in that insn. */
3591 rtx list = mem_ptr->stores;
3593 for ( ; list != NULL_RTX; list = XEXP (list, 1))
3595 rtx insn = XEXP (list, 0);
3596 rtx pat = PATTERN (insn);
3597 rtx src = SET_SRC (pat);
3598 rtx reg = expr->reaching_reg;
3599 rtx copy;
3601 /* If we've already copied it, continue. */
3602 if (expr->reaching_reg == src)
3603 continue;
3605 if (dump_file)
3607 fprintf (dump_file, "PRE: store updated with reaching reg ");
3608 print_rtl (dump_file, reg);
3609 fprintf (dump_file, ":\n ");
3610 print_inline_rtx (dump_file, insn, 8);
3611 fprintf (dump_file, "\n");
3614 copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat)));
3615 emit_insn_before (copy, insn);
3616 SET_SRC (pat) = reg;
3617 df_insn_rescan (insn);
3619 /* un-recognize this pattern since it's probably different now. */
3620 INSN_CODE (insn) = -1;
3621 gcse_create_count++;
3626 /* Return true if the graph is too expensive to optimize. PASS is the
3627 optimization about to be performed. */
3629 static bool
3630 is_too_expensive (const char *pass)
3632 /* Trying to perform global optimizations on flow graphs which have
3633 a high connectivity will take a long time and is unlikely to be
3634 particularly useful.
3636 In normal circumstances a cfg should have about twice as many
3637 edges as blocks. But we do not want to punish small functions
3638 which have a couple switch statements. Rather than simply
3639 threshold the number of blocks, uses something with a more
3640 graceful degradation. */
3641 if (n_edges > 20000 + n_basic_blocks * 4)
3643 warning (OPT_Wdisabled_optimization,
3644 "%s: %d basic blocks and %d edges/basic block",
3645 pass, n_basic_blocks, n_edges / n_basic_blocks);
3647 return true;
3650 /* If allocating memory for the dataflow bitmaps would take up too much
3651 storage it's better just to disable the optimization. */
3652 if ((n_basic_blocks
3653 * SBITMAP_SET_SIZE (max_reg_num ())
3654 * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
3656 warning (OPT_Wdisabled_optimization,
3657 "%s: %d basic blocks and %d registers",
3658 pass, n_basic_blocks, max_reg_num ());
3660 return true;
3663 return false;
3666 /* All the passes implemented in this file. Each pass has its
3667 own gate and execute function, and at the end of the file a
3668 pass definition for passes.c.
3670 We do not construct an accurate cfg in functions which call
3671 setjmp, so none of these passes runs if the function calls
3672 setjmp.
3673 FIXME: Should just handle setjmp via REG_SETJMP notes. */
3675 static bool
3676 gate_rtl_pre (void)
3678 return optimize > 0 && flag_gcse
3679 && !cfun->calls_setjmp
3680 && optimize_function_for_speed_p (cfun)
3681 && dbg_cnt (pre);
3684 static unsigned int
3685 execute_rtl_pre (void)
3687 int changed;
3688 delete_unreachable_blocks ();
3689 df_analyze ();
3690 changed = one_pre_gcse_pass ();
3691 flag_rerun_cse_after_global_opts |= changed;
3692 if (changed)
3693 cleanup_cfg (0);
3694 return 0;
3697 static bool
3698 gate_rtl_hoist (void)
3700 return optimize > 0 && flag_gcse
3701 && !cfun->calls_setjmp
3702 /* It does not make sense to run code hoisting unless we are optimizing
3703 for code size -- it rarely makes programs faster, and can make then
3704 bigger if we did PRE (when optimizing for space, we don't run PRE). */
3705 && optimize_function_for_size_p (cfun)
3706 && dbg_cnt (hoist);
3709 static unsigned int
3710 execute_rtl_hoist (void)
3712 int changed;
3713 delete_unreachable_blocks ();
3714 df_analyze ();
3715 changed = one_code_hoisting_pass ();
3716 flag_rerun_cse_after_global_opts |= changed;
3717 if (changed)
3718 cleanup_cfg (0);
3719 return 0;
3722 struct rtl_opt_pass pass_rtl_pre =
3725 RTL_PASS,
3726 "rtl pre", /* name */
3727 gate_rtl_pre, /* gate */
3728 execute_rtl_pre, /* execute */
3729 NULL, /* sub */
3730 NULL, /* next */
3731 0, /* static_pass_number */
3732 TV_PRE, /* tv_id */
3733 PROP_cfglayout, /* properties_required */
3734 0, /* properties_provided */
3735 0, /* properties_destroyed */
3736 0, /* todo_flags_start */
3737 TODO_df_finish | TODO_verify_rtl_sharing |
3738 TODO_verify_flow | TODO_ggc_collect /* todo_flags_finish */
3742 struct rtl_opt_pass pass_rtl_hoist =
3745 RTL_PASS,
3746 "hoist", /* name */
3747 gate_rtl_hoist, /* gate */
3748 execute_rtl_hoist, /* execute */
3749 NULL, /* sub */
3750 NULL, /* next */
3751 0, /* static_pass_number */
3752 TV_HOIST, /* tv_id */
3753 PROP_cfglayout, /* properties_required */
3754 0, /* properties_provided */
3755 0, /* properties_destroyed */
3756 0, /* todo_flags_start */
3757 TODO_df_finish | TODO_verify_rtl_sharing |
3758 TODO_verify_flow | TODO_ggc_collect /* todo_flags_finish */
3762 #include "gt-gcse.h"