* arm.h (TARGET_CPU_CPP_BUILTINS): Remove Maverick support.
[official-gcc.git] / gcc / gcse.c
blob18e963d1ff097268bc124060c83f36f2b41b97f4
1 /* Partial redundancy elimination / Hoisting for RTL.
2 Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
3 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* TODO
22 - reordering of memory allocation and freeing to be more space efficient
23 - do rough calc of how many regs are needed in each block, and a rough
24 calc of how many regs are available in each class and use that to
25 throttle back the code in cases where RTX_COST is minimal.
28 /* References searched while implementing this.
30 Compilers Principles, Techniques and Tools
31 Aho, Sethi, Ullman
32 Addison-Wesley, 1988
34 Global Optimization by Suppression of Partial Redundancies
35 E. Morel, C. Renvoise
36 communications of the acm, Vol. 22, Num. 2, Feb. 1979
38 A Portable Machine-Independent Global Optimizer - Design and Measurements
39 Frederick Chow
40 Stanford Ph.D. thesis, Dec. 1983
42 A Fast Algorithm for Code Movement Optimization
43 D.M. Dhamdhere
44 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
46 A Solution to a Problem with Morel and Renvoise's
47 Global Optimization by Suppression of Partial Redundancies
48 K-H Drechsler, M.P. Stadel
49 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
51 Practical Adaptation of the Global Optimization
52 Algorithm of Morel and Renvoise
53 D.M. Dhamdhere
54 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
56 Efficiently Computing Static Single Assignment Form and the Control
57 Dependence Graph
58 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
59 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
61 Lazy Code Motion
62 J. Knoop, O. Ruthing, B. Steffen
63 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
65 What's In a Region? Or Computing Control Dependence Regions in Near-Linear
66 Time for Reducible Flow Control
67 Thomas Ball
68 ACM Letters on Programming Languages and Systems,
69 Vol. 2, Num. 1-4, Mar-Dec 1993
71 An Efficient Representation for Sparse Sets
72 Preston Briggs, Linda Torczon
73 ACM Letters on Programming Languages and Systems,
74 Vol. 2, Num. 1-4, Mar-Dec 1993
76 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
77 K-H Drechsler, M.P. Stadel
78 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
80 Partial Dead Code Elimination
81 J. Knoop, O. Ruthing, B. Steffen
82 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
84 Effective Partial Redundancy Elimination
85 P. Briggs, K.D. Cooper
86 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
88 The Program Structure Tree: Computing Control Regions in Linear Time
89 R. Johnson, D. Pearson, K. Pingali
90 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
92 Optimal Code Motion: Theory and Practice
93 J. Knoop, O. Ruthing, B. Steffen
94 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
96 The power of assignment motion
97 J. Knoop, O. Ruthing, B. Steffen
98 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
100 Global code motion / global value numbering
101 C. Click
102 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
104 Value Driven Redundancy Elimination
105 L.T. Simpson
106 Rice University Ph.D. thesis, Apr. 1996
108 Value Numbering
109 L.T. Simpson
110 Massively Scalar Compiler Project, Rice University, Sep. 1996
112 High Performance Compilers for Parallel Computing
113 Michael Wolfe
114 Addison-Wesley, 1996
116 Advanced Compiler Design and Implementation
117 Steven Muchnick
118 Morgan Kaufmann, 1997
120 Building an Optimizing Compiler
121 Robert Morgan
122 Digital Press, 1998
124 People wishing to speed up the code here should read:
125 Elimination Algorithms for Data Flow Analysis
126 B.G. Ryder, M.C. Paull
127 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
129 How to Analyze Large Programs Efficiently and Informatively
130 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
131 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
133 People wishing to do something different can find various possibilities
134 in the above papers and elsewhere.
137 #include "config.h"
138 #include "system.h"
139 #include "coretypes.h"
140 #include "tm.h"
141 #include "diagnostic-core.h"
142 #include "toplev.h"
144 #include "rtl.h"
145 #include "tree.h"
146 #include "tm_p.h"
147 #include "regs.h"
148 #include "hard-reg-set.h"
149 #include "flags.h"
150 #include "insn-config.h"
151 #include "recog.h"
152 #include "basic-block.h"
153 #include "function.h"
154 #include "expr.h"
155 #include "except.h"
156 #include "ggc.h"
157 #include "params.h"
158 #include "cselib.h"
159 #include "intl.h"
160 #include "obstack.h"
161 #include "timevar.h"
162 #include "tree-pass.h"
163 #include "hashtab.h"
164 #include "df.h"
165 #include "dbgcnt.h"
166 #include "target.h"
167 #include "gcse.h"
169 /* We support GCSE via Partial Redundancy Elimination. PRE optimizations
170 are a superset of those done by classic GCSE.
172 Two passes of copy/constant propagation are done around PRE or hoisting
173 because the first one enables more GCSE and the second one helps to clean
174 up the copies that PRE and HOIST create. This is needed more for PRE than
175 for HOIST because code hoisting will try to use an existing register
176 containing the common subexpression rather than create a new one. This is
177 harder to do for PRE because of the code motion (which HOIST doesn't do).
179 Expressions we are interested in GCSE-ing are of the form
180 (set (pseudo-reg) (expression)).
181 Function want_to_gcse_p says what these are.
183 In addition, expressions in REG_EQUAL notes are candidates for GCSE-ing.
184 This allows PRE to hoist expressions that are expressed in multiple insns,
185 such as complex address calculations (e.g. for PIC code, or loads with a
186 high part and a low part).
188 PRE handles moving invariant expressions out of loops (by treating them as
189 partially redundant).
191 **********************
193 We used to support multiple passes but there are diminishing returns in
194 doing so. The first pass usually makes 90% of the changes that are doable.
195 A second pass can make a few more changes made possible by the first pass.
196 Experiments show any further passes don't make enough changes to justify
197 the expense.
199 A study of spec92 using an unlimited number of passes:
200 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
201 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
202 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
204 It was found doing copy propagation between each pass enables further
205 substitutions.
207 This study was done before expressions in REG_EQUAL notes were added as
208 candidate expressions for optimization, and before the GIMPLE optimizers
209 were added. Probably, multiple passes is even less efficient now than
210 at the time when the study was conducted.
212 PRE is quite expensive in complicated functions because the DFA can take
213 a while to converge. Hence we only perform one pass.
215 **********************
217 The steps for PRE are:
219 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
221 2) Perform the data flow analysis for PRE.
223 3) Delete the redundant instructions
225 4) Insert the required copies [if any] that make the partially
226 redundant instructions fully redundant.
228 5) For other reaching expressions, insert an instruction to copy the value
229 to a newly created pseudo that will reach the redundant instruction.
231 The deletion is done first so that when we do insertions we
232 know which pseudo reg to use.
234 Various papers have argued that PRE DFA is expensive (O(n^2)) and others
235 argue it is not. The number of iterations for the algorithm to converge
236 is typically 2-4 so I don't view it as that expensive (relatively speaking).
238 PRE GCSE depends heavily on the second CPROP pass to clean up the copies
239 we create. To make an expression reach the place where it's redundant,
240 the result of the expression is copied to a new register, and the redundant
241 expression is deleted by replacing it with this new register. Classic GCSE
242 doesn't have this problem as much as it computes the reaching defs of
243 each register in each block and thus can try to use an existing
244 register. */
246 /* GCSE global vars. */
248 struct target_gcse default_target_gcse;
249 #if SWITCHABLE_TARGET
250 struct target_gcse *this_target_gcse = &default_target_gcse;
251 #endif
253 /* Set to non-zero if CSE should run after all GCSE optimizations are done. */
254 int flag_rerun_cse_after_global_opts;
256 /* An obstack for our working variables. */
257 static struct obstack gcse_obstack;
259 struct reg_use {rtx reg_rtx; };
261 /* Hash table of expressions. */
263 struct expr
265 /* The expression. */
266 rtx expr;
267 /* Index in the available expression bitmaps. */
268 int bitmap_index;
269 /* Next entry with the same hash. */
270 struct expr *next_same_hash;
271 /* List of anticipatable occurrences in basic blocks in the function.
272 An "anticipatable occurrence" is one that is the first occurrence in the
273 basic block, the operands are not modified in the basic block prior
274 to the occurrence and the output is not used between the start of
275 the block and the occurrence. */
276 struct occr *antic_occr;
277 /* List of available occurrence in basic blocks in the function.
278 An "available occurrence" is one that is the last occurrence in the
279 basic block and the operands are not modified by following statements in
280 the basic block [including this insn]. */
281 struct occr *avail_occr;
282 /* Non-null if the computation is PRE redundant.
283 The value is the newly created pseudo-reg to record a copy of the
284 expression in all the places that reach the redundant copy. */
285 rtx reaching_reg;
286 /* Maximum distance in instructions this expression can travel.
287 We avoid moving simple expressions for more than a few instructions
288 to keep register pressure under control.
289 A value of "0" removes restrictions on how far the expression can
290 travel. */
291 int max_distance;
294 /* Occurrence of an expression.
295 There is one per basic block. If a pattern appears more than once the
296 last appearance is used [or first for anticipatable expressions]. */
298 struct occr
300 /* Next occurrence of this expression. */
301 struct occr *next;
302 /* The insn that computes the expression. */
303 rtx insn;
304 /* Nonzero if this [anticipatable] occurrence has been deleted. */
305 char deleted_p;
306 /* Nonzero if this [available] occurrence has been copied to
307 reaching_reg. */
308 /* ??? This is mutually exclusive with deleted_p, so they could share
309 the same byte. */
310 char copied_p;
313 typedef struct occr *occr_t;
314 DEF_VEC_P (occr_t);
315 DEF_VEC_ALLOC_P (occr_t, heap);
317 /* Expression hash tables.
318 Each hash table is an array of buckets.
319 ??? It is known that if it were an array of entries, structure elements
320 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
321 not clear whether in the final analysis a sufficient amount of memory would
322 be saved as the size of the available expression bitmaps would be larger
323 [one could build a mapping table without holes afterwards though].
324 Someday I'll perform the computation and figure it out. */
326 struct hash_table_d
328 /* The table itself.
329 This is an array of `expr_hash_table_size' elements. */
330 struct expr **table;
332 /* Size of the hash table, in elements. */
333 unsigned int size;
335 /* Number of hash table elements. */
336 unsigned int n_elems;
339 /* Expression hash table. */
340 static struct hash_table_d expr_hash_table;
342 /* This is a list of expressions which are MEMs and will be used by load
343 or store motion.
344 Load motion tracks MEMs which aren't killed by anything except itself,
345 i.e. loads and stores to a single location.
346 We can then allow movement of these MEM refs with a little special
347 allowance. (all stores copy the same value to the reaching reg used
348 for the loads). This means all values used to store into memory must have
349 no side effects so we can re-issue the setter value. */
351 struct ls_expr
353 struct expr * expr; /* Gcse expression reference for LM. */
354 rtx pattern; /* Pattern of this mem. */
355 rtx pattern_regs; /* List of registers mentioned by the mem. */
356 rtx loads; /* INSN list of loads seen. */
357 rtx stores; /* INSN list of stores seen. */
358 struct ls_expr * next; /* Next in the list. */
359 int invalid; /* Invalid for some reason. */
360 int index; /* If it maps to a bitmap index. */
361 unsigned int hash_index; /* Index when in a hash table. */
362 rtx reaching_reg; /* Register to use when re-writing. */
365 /* Head of the list of load/store memory refs. */
366 static struct ls_expr * pre_ldst_mems = NULL;
368 /* Hashtable for the load/store memory refs. */
369 static htab_t pre_ldst_table = NULL;
371 /* Bitmap containing one bit for each register in the program.
372 Used when performing GCSE to track which registers have been set since
373 the start of the basic block. */
374 static regset reg_set_bitmap;
376 /* Array, indexed by basic block number for a list of insns which modify
377 memory within that block. */
378 static VEC (rtx,heap) **modify_mem_list;
379 static bitmap modify_mem_list_set;
381 typedef struct modify_pair_s
383 rtx dest; /* A MEM. */
384 rtx dest_addr; /* The canonical address of `dest'. */
385 } modify_pair;
387 DEF_VEC_O(modify_pair);
388 DEF_VEC_ALLOC_O(modify_pair,heap);
390 /* This array parallels modify_mem_list, except that it stores MEMs
391 being set and their canonicalized memory addresses. */
392 static VEC (modify_pair,heap) **canon_modify_mem_list;
394 /* Bitmap indexed by block numbers to record which blocks contain
395 function calls. */
396 static bitmap blocks_with_calls;
398 /* Various variables for statistics gathering. */
400 /* Memory used in a pass.
401 This isn't intended to be absolutely precise. Its intent is only
402 to keep an eye on memory usage. */
403 static int bytes_used;
405 /* GCSE substitutions made. */
406 static int gcse_subst_count;
407 /* Number of copy instructions created. */
408 static int gcse_create_count;
410 /* Doing code hoisting. */
411 static bool doing_code_hoisting_p = false;
413 /* For available exprs */
414 static sbitmap *ae_kill;
416 static void compute_can_copy (void);
417 static void *gmalloc (size_t) ATTRIBUTE_MALLOC;
418 static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC;
419 static void *gcse_alloc (unsigned long);
420 static void alloc_gcse_mem (void);
421 static void free_gcse_mem (void);
422 static void hash_scan_insn (rtx, struct hash_table_d *);
423 static void hash_scan_set (rtx, rtx, struct hash_table_d *);
424 static void hash_scan_clobber (rtx, rtx, struct hash_table_d *);
425 static void hash_scan_call (rtx, rtx, struct hash_table_d *);
426 static int want_to_gcse_p (rtx, int *);
427 static int oprs_unchanged_p (const_rtx, const_rtx, int);
428 static int oprs_anticipatable_p (const_rtx, const_rtx);
429 static int oprs_available_p (const_rtx, const_rtx);
430 static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, int,
431 struct hash_table_d *);
432 static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int);
433 static int expr_equiv_p (const_rtx, const_rtx);
434 static void record_last_reg_set_info (rtx, int);
435 static void record_last_mem_set_info (rtx);
436 static void record_last_set_info (rtx, const_rtx, void *);
437 static void compute_hash_table (struct hash_table_d *);
438 static void alloc_hash_table (struct hash_table_d *);
439 static void free_hash_table (struct hash_table_d *);
440 static void compute_hash_table_work (struct hash_table_d *);
441 static void dump_hash_table (FILE *, const char *, struct hash_table_d *);
442 static void compute_transp (const_rtx, int, sbitmap *);
443 static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
444 struct hash_table_d *);
445 static void mems_conflict_for_gcse_p (rtx, const_rtx, void *);
446 static int load_killed_in_block_p (const_basic_block, int, const_rtx, int);
447 static void canon_list_insert (rtx, const_rtx, void *);
448 static void alloc_pre_mem (int, int);
449 static void free_pre_mem (void);
450 static struct edge_list *compute_pre_data (void);
451 static int pre_expr_reaches_here_p (basic_block, struct expr *,
452 basic_block);
453 static void insert_insn_end_basic_block (struct expr *, basic_block);
454 static void pre_insert_copy_insn (struct expr *, rtx);
455 static void pre_insert_copies (void);
456 static int pre_delete (void);
457 static int pre_gcse (struct edge_list *);
458 static int one_pre_gcse_pass (void);
459 static void add_label_notes (rtx, rtx);
460 static void alloc_code_hoist_mem (int, int);
461 static void free_code_hoist_mem (void);
462 static void compute_code_hoist_vbeinout (void);
463 static void compute_code_hoist_data (void);
464 static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *,
465 int, int *);
466 static int hoist_code (void);
467 static int one_code_hoisting_pass (void);
468 static rtx process_insert_insn (struct expr *);
469 static int pre_edge_insert (struct edge_list *, struct expr **);
470 static int pre_expr_reaches_here_p_work (basic_block, struct expr *,
471 basic_block, char *);
472 static struct ls_expr * ldst_entry (rtx);
473 static void free_ldst_entry (struct ls_expr *);
474 static void free_ld_motion_mems (void);
475 static void print_ldst_list (FILE *);
476 static struct ls_expr * find_rtx_in_ldst (rtx);
477 static int simple_mem (const_rtx);
478 static void invalidate_any_buried_refs (rtx);
479 static void compute_ld_motion_mems (void);
480 static void trim_ld_motion_mems (void);
481 static void update_ld_motion_stores (struct expr *);
482 static void clear_modify_mem_tables (void);
483 static void free_modify_mem_tables (void);
484 static rtx gcse_emit_move_after (rtx, rtx, rtx);
485 static bool is_too_expensive (const char *);
487 #define GNEW(T) ((T *) gmalloc (sizeof (T)))
488 #define GCNEW(T) ((T *) gcalloc (1, sizeof (T)))
490 #define GNEWVEC(T, N) ((T *) gmalloc (sizeof (T) * (N)))
491 #define GCNEWVEC(T, N) ((T *) gcalloc ((N), sizeof (T)))
493 #define GNEWVAR(T, S) ((T *) gmalloc ((S)))
494 #define GCNEWVAR(T, S) ((T *) gcalloc (1, (S)))
496 #define GOBNEW(T) ((T *) gcse_alloc (sizeof (T)))
497 #define GOBNEWVAR(T, S) ((T *) gcse_alloc ((S)))
499 /* Misc. utilities. */
501 #define can_copy \
502 (this_target_gcse->x_can_copy)
503 #define can_copy_init_p \
504 (this_target_gcse->x_can_copy_init_p)
506 /* Compute which modes support reg/reg copy operations. */
508 static void
509 compute_can_copy (void)
511 int i;
512 #ifndef AVOID_CCMODE_COPIES
513 rtx reg, insn;
514 #endif
515 memset (can_copy, 0, NUM_MACHINE_MODES);
517 start_sequence ();
518 for (i = 0; i < NUM_MACHINE_MODES; i++)
519 if (GET_MODE_CLASS (i) == MODE_CC)
521 #ifdef AVOID_CCMODE_COPIES
522 can_copy[i] = 0;
523 #else
524 reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
525 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
526 if (recog (PATTERN (insn), insn, NULL) >= 0)
527 can_copy[i] = 1;
528 #endif
530 else
531 can_copy[i] = 1;
533 end_sequence ();
536 /* Returns whether the mode supports reg/reg copy operations. */
538 bool
539 can_copy_p (enum machine_mode mode)
541 if (! can_copy_init_p)
543 compute_can_copy ();
544 can_copy_init_p = true;
547 return can_copy[mode] != 0;
550 /* Cover function to xmalloc to record bytes allocated. */
552 static void *
553 gmalloc (size_t size)
555 bytes_used += size;
556 return xmalloc (size);
559 /* Cover function to xcalloc to record bytes allocated. */
561 static void *
562 gcalloc (size_t nelem, size_t elsize)
564 bytes_used += nelem * elsize;
565 return xcalloc (nelem, elsize);
568 /* Cover function to obstack_alloc. */
570 static void *
571 gcse_alloc (unsigned long size)
573 bytes_used += size;
574 return obstack_alloc (&gcse_obstack, size);
577 /* Allocate memory for the reg/memory set tracking tables.
578 This is called at the start of each pass. */
580 static void
581 alloc_gcse_mem (void)
583 /* Allocate vars to track sets of regs. */
584 reg_set_bitmap = ALLOC_REG_SET (NULL);
586 /* Allocate array to keep a list of insns which modify memory in each
587 basic block. */
588 modify_mem_list = GCNEWVEC (VEC (rtx,heap) *, last_basic_block);
589 canon_modify_mem_list = GCNEWVEC (VEC (modify_pair,heap) *,
590 last_basic_block);
591 modify_mem_list_set = BITMAP_ALLOC (NULL);
592 blocks_with_calls = BITMAP_ALLOC (NULL);
595 /* Free memory allocated by alloc_gcse_mem. */
597 static void
598 free_gcse_mem (void)
600 FREE_REG_SET (reg_set_bitmap);
602 free_modify_mem_tables ();
603 BITMAP_FREE (modify_mem_list_set);
604 BITMAP_FREE (blocks_with_calls);
607 /* Compute the local properties of each recorded expression.
609 Local properties are those that are defined by the block, irrespective of
610 other blocks.
612 An expression is transparent in a block if its operands are not modified
613 in the block.
615 An expression is computed (locally available) in a block if it is computed
616 at least once and expression would contain the same value if the
617 computation was moved to the end of the block.
619 An expression is locally anticipatable in a block if it is computed at
620 least once and expression would contain the same value if the computation
621 was moved to the beginning of the block.
623 We call this routine for pre and code hoisting. They all compute
624 basically the same information and thus can easily share this code.
626 TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
627 properties. If NULL, then it is not necessary to compute or record that
628 particular property.
630 TABLE controls which hash table to look at. */
632 static void
633 compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc,
634 struct hash_table_d *table)
636 unsigned int i;
638 /* Initialize any bitmaps that were passed in. */
639 if (transp)
641 sbitmap_vector_ones (transp, last_basic_block);
644 if (comp)
645 sbitmap_vector_zero (comp, last_basic_block);
646 if (antloc)
647 sbitmap_vector_zero (antloc, last_basic_block);
649 for (i = 0; i < table->size; i++)
651 struct expr *expr;
653 for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
655 int indx = expr->bitmap_index;
656 struct occr *occr;
658 /* The expression is transparent in this block if it is not killed.
659 We start by assuming all are transparent [none are killed], and
660 then reset the bits for those that are. */
661 if (transp)
662 compute_transp (expr->expr, indx, transp);
664 /* The occurrences recorded in antic_occr are exactly those that
665 we want to set to nonzero in ANTLOC. */
666 if (antloc)
667 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
669 SET_BIT (antloc[BLOCK_FOR_INSN (occr->insn)->index], indx);
671 /* While we're scanning the table, this is a good place to
672 initialize this. */
673 occr->deleted_p = 0;
676 /* The occurrences recorded in avail_occr are exactly those that
677 we want to set to nonzero in COMP. */
678 if (comp)
679 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
681 SET_BIT (comp[BLOCK_FOR_INSN (occr->insn)->index], indx);
683 /* While we're scanning the table, this is a good place to
684 initialize this. */
685 occr->copied_p = 0;
688 /* While we're scanning the table, this is a good place to
689 initialize this. */
690 expr->reaching_reg = 0;
695 /* Hash table support. */
697 struct reg_avail_info
699 basic_block last_bb;
700 int first_set;
701 int last_set;
704 static struct reg_avail_info *reg_avail_info;
705 static basic_block current_bb;
707 /* See whether X, the source of a set, is something we want to consider for
708 GCSE. */
710 static int
711 want_to_gcse_p (rtx x, int *max_distance_ptr)
713 #ifdef STACK_REGS
714 /* On register stack architectures, don't GCSE constants from the
715 constant pool, as the benefits are often swamped by the overhead
716 of shuffling the register stack between basic blocks. */
717 if (IS_STACK_MODE (GET_MODE (x)))
718 x = avoid_constant_pool_reference (x);
719 #endif
721 /* GCSE'ing constants:
723 We do not specifically distinguish between constant and non-constant
724 expressions in PRE and Hoist. We use set_src_cost below to limit
725 the maximum distance simple expressions can travel.
727 Nevertheless, constants are much easier to GCSE, and, hence,
728 it is easy to overdo the optimizations. Usually, excessive PRE and
729 Hoisting of constant leads to increased register pressure.
731 RA can deal with this by rematerialing some of the constants.
732 Therefore, it is important that the back-end generates sets of constants
733 in a way that allows reload rematerialize them under high register
734 pressure, i.e., a pseudo register with REG_EQUAL to constant
735 is set only once. Failing to do so will result in IRA/reload
736 spilling such constants under high register pressure instead of
737 rematerializing them. */
739 switch (GET_CODE (x))
741 case REG:
742 case SUBREG:
743 case CALL:
744 return 0;
746 case CONST_INT:
747 case CONST_DOUBLE:
748 case CONST_FIXED:
749 case CONST_VECTOR:
750 if (!doing_code_hoisting_p)
751 /* Do not PRE constants. */
752 return 0;
754 /* FALLTHRU */
756 default:
757 if (doing_code_hoisting_p)
758 /* PRE doesn't implement max_distance restriction. */
760 int cost;
761 int max_distance;
763 gcc_assert (!optimize_function_for_speed_p (cfun)
764 && optimize_function_for_size_p (cfun));
765 cost = set_src_cost (x, 0);
767 if (cost < COSTS_N_INSNS (GCSE_UNRESTRICTED_COST))
769 max_distance = (GCSE_COST_DISTANCE_RATIO * cost) / 10;
770 if (max_distance == 0)
771 return 0;
773 gcc_assert (max_distance > 0);
775 else
776 max_distance = 0;
778 if (max_distance_ptr)
779 *max_distance_ptr = max_distance;
782 return can_assign_to_reg_without_clobbers_p (x);
786 /* Used internally by can_assign_to_reg_without_clobbers_p. */
788 static GTY(()) rtx test_insn;
790 /* Return true if we can assign X to a pseudo register such that the
791 resulting insn does not result in clobbering a hard register as a
792 side-effect.
794 Additionally, if the target requires it, check that the resulting insn
795 can be copied. If it cannot, this means that X is special and probably
796 has hidden side-effects we don't want to mess with.
798 This function is typically used by code motion passes, to verify
799 that it is safe to insert an insn without worrying about clobbering
800 maybe live hard regs. */
802 bool
803 can_assign_to_reg_without_clobbers_p (rtx x)
805 int num_clobbers = 0;
806 int icode;
808 /* If this is a valid operand, we are OK. If it's VOIDmode, we aren't. */
809 if (general_operand (x, GET_MODE (x)))
810 return 1;
811 else if (GET_MODE (x) == VOIDmode)
812 return 0;
814 /* Otherwise, check if we can make a valid insn from it. First initialize
815 our test insn if we haven't already. */
816 if (test_insn == 0)
818 test_insn
819 = make_insn_raw (gen_rtx_SET (VOIDmode,
820 gen_rtx_REG (word_mode,
821 FIRST_PSEUDO_REGISTER * 2),
822 const0_rtx));
823 NEXT_INSN (test_insn) = PREV_INSN (test_insn) = 0;
826 /* Now make an insn like the one we would make when GCSE'ing and see if
827 valid. */
828 PUT_MODE (SET_DEST (PATTERN (test_insn)), GET_MODE (x));
829 SET_SRC (PATTERN (test_insn)) = x;
831 icode = recog (PATTERN (test_insn), test_insn, &num_clobbers);
832 if (icode < 0)
833 return false;
835 if (num_clobbers > 0 && added_clobbers_hard_reg_p (icode))
836 return false;
838 if (targetm.cannot_copy_insn_p && targetm.cannot_copy_insn_p (test_insn))
839 return false;
841 return true;
844 /* Return nonzero if the operands of expression X are unchanged from the
845 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
846 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */
848 static int
849 oprs_unchanged_p (const_rtx x, const_rtx insn, int avail_p)
851 int i, j;
852 enum rtx_code code;
853 const char *fmt;
855 if (x == 0)
856 return 1;
858 code = GET_CODE (x);
859 switch (code)
861 case REG:
863 struct reg_avail_info *info = &reg_avail_info[REGNO (x)];
865 if (info->last_bb != current_bb)
866 return 1;
867 if (avail_p)
868 return info->last_set < DF_INSN_LUID (insn);
869 else
870 return info->first_set >= DF_INSN_LUID (insn);
873 case MEM:
874 if (load_killed_in_block_p (current_bb, DF_INSN_LUID (insn),
875 x, avail_p))
876 return 0;
877 else
878 return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
880 case PRE_DEC:
881 case PRE_INC:
882 case POST_DEC:
883 case POST_INC:
884 case PRE_MODIFY:
885 case POST_MODIFY:
886 return 0;
888 case PC:
889 case CC0: /*FIXME*/
890 case CONST:
891 case CONST_INT:
892 case CONST_DOUBLE:
893 case CONST_FIXED:
894 case CONST_VECTOR:
895 case SYMBOL_REF:
896 case LABEL_REF:
897 case ADDR_VEC:
898 case ADDR_DIFF_VEC:
899 return 1;
901 default:
902 break;
905 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
907 if (fmt[i] == 'e')
909 /* If we are about to do the last recursive call needed at this
910 level, change it into iteration. This function is called enough
911 to be worth it. */
912 if (i == 0)
913 return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
915 else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
916 return 0;
918 else if (fmt[i] == 'E')
919 for (j = 0; j < XVECLEN (x, i); j++)
920 if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
921 return 0;
924 return 1;
927 /* Info passed from load_killed_in_block_p to mems_conflict_for_gcse_p. */
929 struct mem_conflict_info
931 /* A memory reference for a load instruction, mems_conflict_for_gcse_p will
932 see if a memory store conflicts with this memory load. */
933 const_rtx mem;
935 /* True if mems_conflict_for_gcse_p finds a conflict between two memory
936 references. */
937 bool conflict;
940 /* DEST is the output of an instruction. If it is a memory reference and
941 possibly conflicts with the load found in DATA, then communicate this
942 information back through DATA. */
944 static void
945 mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
946 void *data)
948 struct mem_conflict_info *mci = (struct mem_conflict_info *) data;
950 while (GET_CODE (dest) == SUBREG
951 || GET_CODE (dest) == ZERO_EXTRACT
952 || GET_CODE (dest) == STRICT_LOW_PART)
953 dest = XEXP (dest, 0);
955 /* If DEST is not a MEM, then it will not conflict with the load. Note
956 that function calls are assumed to clobber memory, but are handled
957 elsewhere. */
958 if (! MEM_P (dest))
959 return;
961 /* If we are setting a MEM in our list of specially recognized MEMs,
962 don't mark as killed this time. */
963 if (pre_ldst_mems != NULL && expr_equiv_p (dest, mci->mem))
965 if (!find_rtx_in_ldst (dest))
966 mci->conflict = true;
967 return;
970 if (true_dependence (dest, GET_MODE (dest), mci->mem))
971 mci->conflict = true;
974 /* Return nonzero if the expression in X (a memory reference) is killed
975 in block BB before or after the insn with the LUID in UID_LIMIT.
976 AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
977 before UID_LIMIT.
979 To check the entire block, set UID_LIMIT to max_uid + 1 and
980 AVAIL_P to 0. */
982 static int
983 load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x,
984 int avail_p)
986 VEC (rtx,heap) *list = modify_mem_list[bb->index];
987 rtx setter;
988 unsigned ix;
990 /* If this is a readonly then we aren't going to be changing it. */
991 if (MEM_READONLY_P (x))
992 return 0;
994 FOR_EACH_VEC_ELT_REVERSE (rtx, list, ix, setter)
996 struct mem_conflict_info mci;
998 /* Ignore entries in the list that do not apply. */
999 if ((avail_p
1000 && DF_INSN_LUID (setter) < uid_limit)
1001 || (! avail_p
1002 && DF_INSN_LUID (setter) > uid_limit))
1003 continue;
1005 /* If SETTER is a call everything is clobbered. Note that calls
1006 to pure functions are never put on the list, so we need not
1007 worry about them. */
1008 if (CALL_P (setter))
1009 return 1;
1011 /* SETTER must be an INSN of some kind that sets memory. Call
1012 note_stores to examine each hunk of memory that is modified. */
1013 mci.mem = x;
1014 mci.conflict = false;
1015 note_stores (PATTERN (setter), mems_conflict_for_gcse_p, &mci);
1016 if (mci.conflict)
1017 return 1;
1019 return 0;
1022 /* Return nonzero if the operands of expression X are unchanged from
1023 the start of INSN's basic block up to but not including INSN. */
1025 static int
1026 oprs_anticipatable_p (const_rtx x, const_rtx insn)
1028 return oprs_unchanged_p (x, insn, 0);
1031 /* Return nonzero if the operands of expression X are unchanged from
1032 INSN to the end of INSN's basic block. */
1034 static int
1035 oprs_available_p (const_rtx x, const_rtx insn)
1037 return oprs_unchanged_p (x, insn, 1);
1040 /* Hash expression X.
1042 MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean
1043 indicating if a volatile operand is found or if the expression contains
1044 something we don't want to insert in the table. HASH_TABLE_SIZE is
1045 the current size of the hash table to be probed. */
1047 static unsigned int
1048 hash_expr (const_rtx x, enum machine_mode mode, int *do_not_record_p,
1049 int hash_table_size)
1051 unsigned int hash;
1053 *do_not_record_p = 0;
1055 hash = hash_rtx (x, mode, do_not_record_p, NULL, /*have_reg_qty=*/false);
1056 return hash % hash_table_size;
1059 /* Return nonzero if exp1 is equivalent to exp2. */
1061 static int
1062 expr_equiv_p (const_rtx x, const_rtx y)
1064 return exp_equiv_p (x, y, 0, true);
1067 /* Insert expression X in INSN in the hash TABLE.
1068 If it is already present, record it as the last occurrence in INSN's
1069 basic block.
1071 MODE is the mode of the value X is being stored into.
1072 It is only used if X is a CONST_INT.
1074 ANTIC_P is nonzero if X is an anticipatable expression.
1075 AVAIL_P is nonzero if X is an available expression.
1077 MAX_DISTANCE is the maximum distance in instructions this expression can
1078 be moved. */
1080 static void
1081 insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
1082 int avail_p, int max_distance, struct hash_table_d *table)
1084 int found, do_not_record_p;
1085 unsigned int hash;
1086 struct expr *cur_expr, *last_expr = NULL;
1087 struct occr *antic_occr, *avail_occr;
1089 hash = hash_expr (x, mode, &do_not_record_p, table->size);
1091 /* Do not insert expression in table if it contains volatile operands,
1092 or if hash_expr determines the expression is something we don't want
1093 to or can't handle. */
1094 if (do_not_record_p)
1095 return;
1097 cur_expr = table->table[hash];
1098 found = 0;
1100 while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1102 /* If the expression isn't found, save a pointer to the end of
1103 the list. */
1104 last_expr = cur_expr;
1105 cur_expr = cur_expr->next_same_hash;
1108 if (! found)
1110 cur_expr = GOBNEW (struct expr);
1111 bytes_used += sizeof (struct expr);
1112 if (table->table[hash] == NULL)
1113 /* This is the first pattern that hashed to this index. */
1114 table->table[hash] = cur_expr;
1115 else
1116 /* Add EXPR to end of this hash chain. */
1117 last_expr->next_same_hash = cur_expr;
1119 /* Set the fields of the expr element. */
1120 cur_expr->expr = x;
1121 cur_expr->bitmap_index = table->n_elems++;
1122 cur_expr->next_same_hash = NULL;
1123 cur_expr->antic_occr = NULL;
1124 cur_expr->avail_occr = NULL;
1125 gcc_assert (max_distance >= 0);
1126 cur_expr->max_distance = max_distance;
1128 else
1129 gcc_assert (cur_expr->max_distance == max_distance);
1131 /* Now record the occurrence(s). */
1132 if (antic_p)
1134 antic_occr = cur_expr->antic_occr;
1136 if (antic_occr
1137 && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn))
1138 antic_occr = NULL;
1140 if (antic_occr)
1141 /* Found another instance of the expression in the same basic block.
1142 Prefer the currently recorded one. We want the first one in the
1143 block and the block is scanned from start to end. */
1144 ; /* nothing to do */
1145 else
1147 /* First occurrence of this expression in this basic block. */
1148 antic_occr = GOBNEW (struct occr);
1149 bytes_used += sizeof (struct occr);
1150 antic_occr->insn = insn;
1151 antic_occr->next = cur_expr->antic_occr;
1152 antic_occr->deleted_p = 0;
1153 cur_expr->antic_occr = antic_occr;
1157 if (avail_p)
1159 avail_occr = cur_expr->avail_occr;
1161 if (avail_occr
1162 && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn))
1164 /* Found another instance of the expression in the same basic block.
1165 Prefer this occurrence to the currently recorded one. We want
1166 the last one in the block and the block is scanned from start
1167 to end. */
1168 avail_occr->insn = insn;
1170 else
1172 /* First occurrence of this expression in this basic block. */
1173 avail_occr = GOBNEW (struct occr);
1174 bytes_used += sizeof (struct occr);
1175 avail_occr->insn = insn;
1176 avail_occr->next = cur_expr->avail_occr;
1177 avail_occr->deleted_p = 0;
1178 cur_expr->avail_occr = avail_occr;
1183 /* Scan SET present in INSN and add an entry to the hash TABLE. */
1185 static void
1186 hash_scan_set (rtx set, rtx insn, struct hash_table_d *table)
1188 rtx src = SET_SRC (set);
1189 rtx dest = SET_DEST (set);
1190 rtx note;
1192 if (GET_CODE (src) == CALL)
1193 hash_scan_call (src, insn, table);
1195 else if (REG_P (dest))
1197 unsigned int regno = REGNO (dest);
1198 int max_distance = 0;
1200 /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
1202 This allows us to do a single GCSE pass and still eliminate
1203 redundant constants, addresses or other expressions that are
1204 constructed with multiple instructions.
1206 However, keep the original SRC if INSN is a simple reg-reg move.
1207 In this case, there will almost always be a REG_EQUAL note on the
1208 insn that sets SRC. By recording the REG_EQUAL value here as SRC
1209 for INSN, we miss copy propagation opportunities and we perform the
1210 same PRE GCSE operation repeatedly on the same REG_EQUAL value if we
1211 do more than one PRE GCSE pass.
1213 Note that this does not impede profitable constant propagations. We
1214 "look through" reg-reg sets in lookup_avail_set. */
1215 note = find_reg_equal_equiv_note (insn);
1216 if (note != 0
1217 && REG_NOTE_KIND (note) == REG_EQUAL
1218 && !REG_P (src)
1219 && want_to_gcse_p (XEXP (note, 0), NULL))
1220 src = XEXP (note, 0), set = gen_rtx_SET (VOIDmode, dest, src);
1222 /* Only record sets of pseudo-regs in the hash table. */
1223 if (regno >= FIRST_PSEUDO_REGISTER
1224 /* Don't GCSE something if we can't do a reg/reg copy. */
1225 && can_copy_p (GET_MODE (dest))
1226 /* GCSE commonly inserts instruction after the insn. We can't
1227 do that easily for EH edges so disable GCSE on these for now. */
1228 /* ??? We can now easily create new EH landing pads at the
1229 gimple level, for splitting edges; there's no reason we
1230 can't do the same thing at the rtl level. */
1231 && !can_throw_internal (insn)
1232 /* Is SET_SRC something we want to gcse? */
1233 && want_to_gcse_p (src, &max_distance)
1234 /* Don't CSE a nop. */
1235 && ! set_noop_p (set)
1236 /* Don't GCSE if it has attached REG_EQUIV note.
1237 At this point this only function parameters should have
1238 REG_EQUIV notes and if the argument slot is used somewhere
1239 explicitly, it means address of parameter has been taken,
1240 so we should not extend the lifetime of the pseudo. */
1241 && (note == NULL_RTX || ! MEM_P (XEXP (note, 0))))
1243 /* An expression is not anticipatable if its operands are
1244 modified before this insn or if this is not the only SET in
1245 this insn. The latter condition does not have to mean that
1246 SRC itself is not anticipatable, but we just will not be
1247 able to handle code motion of insns with multiple sets. */
1248 int antic_p = oprs_anticipatable_p (src, insn)
1249 && !multiple_sets (insn);
1250 /* An expression is not available if its operands are
1251 subsequently modified, including this insn. It's also not
1252 available if this is a branch, because we can't insert
1253 a set after the branch. */
1254 int avail_p = (oprs_available_p (src, insn)
1255 && ! JUMP_P (insn));
1257 insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p,
1258 max_distance, table);
1261 /* In case of store we want to consider the memory value as available in
1262 the REG stored in that memory. This makes it possible to remove
1263 redundant loads from due to stores to the same location. */
1264 else if (flag_gcse_las && REG_P (src) && MEM_P (dest))
1266 unsigned int regno = REGNO (src);
1267 int max_distance = 0;
1269 /* Only record sets of pseudo-regs in the hash table. */
1270 if (regno >= FIRST_PSEUDO_REGISTER
1271 /* Don't GCSE something if we can't do a reg/reg copy. */
1272 && can_copy_p (GET_MODE (src))
1273 /* GCSE commonly inserts instruction after the insn. We can't
1274 do that easily for EH edges so disable GCSE on these for now. */
1275 && !can_throw_internal (insn)
1276 /* Is SET_DEST something we want to gcse? */
1277 && want_to_gcse_p (dest, &max_distance)
1278 /* Don't CSE a nop. */
1279 && ! set_noop_p (set)
1280 /* Don't GCSE if it has attached REG_EQUIV note.
1281 At this point this only function parameters should have
1282 REG_EQUIV notes and if the argument slot is used somewhere
1283 explicitly, it means address of parameter has been taken,
1284 so we should not extend the lifetime of the pseudo. */
1285 && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
1286 || ! MEM_P (XEXP (note, 0))))
1288 /* Stores are never anticipatable. */
1289 int antic_p = 0;
1290 /* An expression is not available if its operands are
1291 subsequently modified, including this insn. It's also not
1292 available if this is a branch, because we can't insert
1293 a set after the branch. */
1294 int avail_p = oprs_available_p (dest, insn)
1295 && ! JUMP_P (insn);
1297 /* Record the memory expression (DEST) in the hash table. */
1298 insert_expr_in_table (dest, GET_MODE (dest), insn,
1299 antic_p, avail_p, max_distance, table);
1304 static void
1305 hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1306 struct hash_table_d *table ATTRIBUTE_UNUSED)
1308 /* Currently nothing to do. */
1311 static void
1312 hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1313 struct hash_table_d *table ATTRIBUTE_UNUSED)
1315 /* Currently nothing to do. */
1318 /* Process INSN and add hash table entries as appropriate. */
1320 static void
1321 hash_scan_insn (rtx insn, struct hash_table_d *table)
1323 rtx pat = PATTERN (insn);
1324 int i;
1326 /* Pick out the sets of INSN and for other forms of instructions record
1327 what's been modified. */
1329 if (GET_CODE (pat) == SET)
1330 hash_scan_set (pat, insn, table);
1332 else if (GET_CODE (pat) == CLOBBER)
1333 hash_scan_clobber (pat, insn, table);
1335 else if (GET_CODE (pat) == CALL)
1336 hash_scan_call (pat, insn, table);
1338 else if (GET_CODE (pat) == PARALLEL)
1339 for (i = 0; i < XVECLEN (pat, 0); i++)
1341 rtx x = XVECEXP (pat, 0, i);
1343 if (GET_CODE (x) == SET)
1344 hash_scan_set (x, insn, table);
1345 else if (GET_CODE (x) == CLOBBER)
1346 hash_scan_clobber (x, insn, table);
1347 else if (GET_CODE (x) == CALL)
1348 hash_scan_call (x, insn, table);
1352 /* Dump the hash table TABLE to file FILE under the name NAME. */
1354 static void
1355 dump_hash_table (FILE *file, const char *name, struct hash_table_d *table)
1357 int i;
1358 /* Flattened out table, so it's printed in proper order. */
1359 struct expr **flat_table;
1360 unsigned int *hash_val;
1361 struct expr *expr;
1363 flat_table = XCNEWVEC (struct expr *, table->n_elems);
1364 hash_val = XNEWVEC (unsigned int, table->n_elems);
1366 for (i = 0; i < (int) table->size; i++)
1367 for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
1369 flat_table[expr->bitmap_index] = expr;
1370 hash_val[expr->bitmap_index] = i;
1373 fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1374 name, table->size, table->n_elems);
1376 for (i = 0; i < (int) table->n_elems; i++)
1377 if (flat_table[i] != 0)
1379 expr = flat_table[i];
1380 fprintf (file, "Index %d (hash value %d; max distance %d)\n ",
1381 expr->bitmap_index, hash_val[i], expr->max_distance);
1382 print_rtl (file, expr->expr);
1383 fprintf (file, "\n");
1386 fprintf (file, "\n");
1388 free (flat_table);
1389 free (hash_val);
1392 /* Record register first/last/block set information for REGNO in INSN.
1394 first_set records the first place in the block where the register
1395 is set and is used to compute "anticipatability".
1397 last_set records the last place in the block where the register
1398 is set and is used to compute "availability".
1400 last_bb records the block for which first_set and last_set are
1401 valid, as a quick test to invalidate them. */
1403 static void
1404 record_last_reg_set_info (rtx insn, int regno)
1406 struct reg_avail_info *info = &reg_avail_info[regno];
1407 int luid = DF_INSN_LUID (insn);
1409 info->last_set = luid;
1410 if (info->last_bb != current_bb)
1412 info->last_bb = current_bb;
1413 info->first_set = luid;
1417 /* Record all of the canonicalized MEMs of record_last_mem_set_info's insn.
1418 Note we store a pair of elements in the list, so they have to be
1419 taken off pairwise. */
1421 static void
1422 canon_list_insert (rtx dest ATTRIBUTE_UNUSED, const_rtx x ATTRIBUTE_UNUSED,
1423 void * v_insn)
1425 rtx dest_addr, insn;
1426 int bb;
1427 modify_pair *pair;
1429 while (GET_CODE (dest) == SUBREG
1430 || GET_CODE (dest) == ZERO_EXTRACT
1431 || GET_CODE (dest) == STRICT_LOW_PART)
1432 dest = XEXP (dest, 0);
1434 /* If DEST is not a MEM, then it will not conflict with a load. Note
1435 that function calls are assumed to clobber memory, but are handled
1436 elsewhere. */
1438 if (! MEM_P (dest))
1439 return;
1441 dest_addr = get_addr (XEXP (dest, 0));
1442 dest_addr = canon_rtx (dest_addr);
1443 insn = (rtx) v_insn;
1444 bb = BLOCK_FOR_INSN (insn)->index;
1446 pair = VEC_safe_push (modify_pair, heap, canon_modify_mem_list[bb], NULL);
1447 pair->dest = dest;
1448 pair->dest_addr = dest_addr;
1451 /* Record memory modification information for INSN. We do not actually care
1452 about the memory location(s) that are set, or even how they are set (consider
1453 a CALL_INSN). We merely need to record which insns modify memory. */
1455 static void
1456 record_last_mem_set_info (rtx insn)
1458 int bb = BLOCK_FOR_INSN (insn)->index;
1460 /* load_killed_in_block_p will handle the case of calls clobbering
1461 everything. */
1462 VEC_safe_push (rtx, heap, modify_mem_list[bb], insn);
1463 bitmap_set_bit (modify_mem_list_set, bb);
1465 if (CALL_P (insn))
1466 bitmap_set_bit (blocks_with_calls, bb);
1467 else
1468 note_stores (PATTERN (insn), canon_list_insert, (void*) insn);
1471 /* Called from compute_hash_table via note_stores to handle one
1472 SET or CLOBBER in an insn. DATA is really the instruction in which
1473 the SET is taking place. */
1475 static void
1476 record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
1478 rtx last_set_insn = (rtx) data;
1480 if (GET_CODE (dest) == SUBREG)
1481 dest = SUBREG_REG (dest);
1483 if (REG_P (dest))
1484 record_last_reg_set_info (last_set_insn, REGNO (dest));
1485 else if (MEM_P (dest)
1486 /* Ignore pushes, they clobber nothing. */
1487 && ! push_operand (dest, GET_MODE (dest)))
1488 record_last_mem_set_info (last_set_insn);
1491 /* Top level function to create an expression hash table.
1493 Expression entries are placed in the hash table if
1494 - they are of the form (set (pseudo-reg) src),
1495 - src is something we want to perform GCSE on,
1496 - none of the operands are subsequently modified in the block
1498 Currently src must be a pseudo-reg or a const_int.
1500 TABLE is the table computed. */
1502 static void
1503 compute_hash_table_work (struct hash_table_d *table)
1505 int i;
1507 /* re-Cache any INSN_LIST nodes we have allocated. */
1508 clear_modify_mem_tables ();
1509 /* Some working arrays used to track first and last set in each block. */
1510 reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ());
1512 for (i = 0; i < max_reg_num (); ++i)
1513 reg_avail_info[i].last_bb = NULL;
1515 FOR_EACH_BB (current_bb)
1517 rtx insn;
1518 unsigned int regno;
1520 /* First pass over the instructions records information used to
1521 determine when registers and memory are first and last set. */
1522 FOR_BB_INSNS (current_bb, insn)
1524 if (!NONDEBUG_INSN_P (insn))
1525 continue;
1527 if (CALL_P (insn))
1529 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1530 if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
1531 record_last_reg_set_info (insn, regno);
1533 if (! RTL_CONST_OR_PURE_CALL_P (insn))
1534 record_last_mem_set_info (insn);
1537 note_stores (PATTERN (insn), record_last_set_info, insn);
1540 /* The next pass builds the hash table. */
1541 FOR_BB_INSNS (current_bb, insn)
1542 if (NONDEBUG_INSN_P (insn))
1543 hash_scan_insn (insn, table);
1546 free (reg_avail_info);
1547 reg_avail_info = NULL;
1550 /* Allocate space for the set/expr hash TABLE.
1551 It is used to determine the number of buckets to use. */
1553 static void
1554 alloc_hash_table (struct hash_table_d *table)
1556 int n;
1558 n = get_max_insn_count ();
1560 table->size = n / 4;
1561 if (table->size < 11)
1562 table->size = 11;
1564 /* Attempt to maintain efficient use of hash table.
1565 Making it an odd number is simplest for now.
1566 ??? Later take some measurements. */
1567 table->size |= 1;
1568 n = table->size * sizeof (struct expr *);
1569 table->table = GNEWVAR (struct expr *, n);
1572 /* Free things allocated by alloc_hash_table. */
1574 static void
1575 free_hash_table (struct hash_table_d *table)
1577 free (table->table);
1580 /* Compute the expression hash table TABLE. */
1582 static void
1583 compute_hash_table (struct hash_table_d *table)
1585 /* Initialize count of number of entries in hash table. */
1586 table->n_elems = 0;
1587 memset (table->table, 0, table->size * sizeof (struct expr *));
1589 compute_hash_table_work (table);
1592 /* Expression tracking support. */
1594 /* Clear canon_modify_mem_list and modify_mem_list tables. */
1595 static void
1596 clear_modify_mem_tables (void)
1598 unsigned i;
1599 bitmap_iterator bi;
1601 EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
1603 VEC_free (rtx, heap, modify_mem_list[i]);
1604 VEC_free (modify_pair, heap, canon_modify_mem_list[i]);
1606 bitmap_clear (modify_mem_list_set);
1607 bitmap_clear (blocks_with_calls);
1610 /* Release memory used by modify_mem_list_set. */
1612 static void
1613 free_modify_mem_tables (void)
1615 clear_modify_mem_tables ();
1616 free (modify_mem_list);
1617 free (canon_modify_mem_list);
1618 modify_mem_list = 0;
1619 canon_modify_mem_list = 0;
1622 /* For each block, compute whether X is transparent. X is either an
1623 expression or an assignment [though we don't care which, for this context
1624 an assignment is treated as an expression]. For each block where an
1625 element of X is modified, reset the INDX bit in BMAP. */
1627 static void
1628 compute_transp (const_rtx x, int indx, sbitmap *bmap)
1630 int i, j;
1631 enum rtx_code code;
1632 const char *fmt;
1634 /* repeat is used to turn tail-recursion into iteration since GCC
1635 can't do it when there's no return value. */
1636 repeat:
1638 if (x == 0)
1639 return;
1641 code = GET_CODE (x);
1642 switch (code)
1644 case REG:
1646 df_ref def;
1647 for (def = DF_REG_DEF_CHAIN (REGNO (x));
1648 def;
1649 def = DF_REF_NEXT_REG (def))
1650 RESET_BIT (bmap[DF_REF_BB (def)->index], indx);
1653 return;
1655 case MEM:
1656 if (! MEM_READONLY_P (x))
1658 bitmap_iterator bi;
1659 unsigned bb_index;
1661 /* First handle all the blocks with calls. We don't need to
1662 do any list walking for them. */
1663 EXECUTE_IF_SET_IN_BITMAP (blocks_with_calls, 0, bb_index, bi)
1665 RESET_BIT (bmap[bb_index], indx);
1668 /* Now iterate over the blocks which have memory modifications
1669 but which do not have any calls. */
1670 EXECUTE_IF_AND_COMPL_IN_BITMAP (modify_mem_list_set,
1671 blocks_with_calls,
1672 0, bb_index, bi)
1674 VEC (modify_pair,heap) *list
1675 = canon_modify_mem_list[bb_index];
1676 modify_pair *pair;
1677 unsigned ix;
1679 FOR_EACH_VEC_ELT_REVERSE (modify_pair, list, ix, pair)
1681 rtx dest = pair->dest;
1682 rtx dest_addr = pair->dest_addr;
1684 if (canon_true_dependence (dest, GET_MODE (dest),
1685 dest_addr, x, NULL_RTX))
1686 RESET_BIT (bmap[bb_index], indx);
1691 x = XEXP (x, 0);
1692 goto repeat;
1694 case PC:
1695 case CC0: /*FIXME*/
1696 case CONST:
1697 case CONST_INT:
1698 case CONST_DOUBLE:
1699 case CONST_FIXED:
1700 case CONST_VECTOR:
1701 case SYMBOL_REF:
1702 case LABEL_REF:
1703 case ADDR_VEC:
1704 case ADDR_DIFF_VEC:
1705 return;
1707 default:
1708 break;
1711 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1713 if (fmt[i] == 'e')
1715 /* If we are about to do the last recursive call
1716 needed at this level, change it into iteration.
1717 This function is called enough to be worth it. */
1718 if (i == 0)
1720 x = XEXP (x, i);
1721 goto repeat;
1724 compute_transp (XEXP (x, i), indx, bmap);
1726 else if (fmt[i] == 'E')
1727 for (j = 0; j < XVECLEN (x, i); j++)
1728 compute_transp (XVECEXP (x, i, j), indx, bmap);
1732 /* Compute PRE+LCM working variables. */
1734 /* Local properties of expressions. */
1736 /* Nonzero for expressions that are transparent in the block. */
1737 static sbitmap *transp;
1739 /* Nonzero for expressions that are computed (available) in the block. */
1740 static sbitmap *comp;
1742 /* Nonzero for expressions that are locally anticipatable in the block. */
1743 static sbitmap *antloc;
1745 /* Nonzero for expressions where this block is an optimal computation
1746 point. */
1747 static sbitmap *pre_optimal;
1749 /* Nonzero for expressions which are redundant in a particular block. */
1750 static sbitmap *pre_redundant;
1752 /* Nonzero for expressions which should be inserted on a specific edge. */
1753 static sbitmap *pre_insert_map;
1755 /* Nonzero for expressions which should be deleted in a specific block. */
1756 static sbitmap *pre_delete_map;
1758 /* Allocate vars used for PRE analysis. */
1760 static void
1761 alloc_pre_mem (int n_blocks, int n_exprs)
1763 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
1764 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
1765 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
1767 pre_optimal = NULL;
1768 pre_redundant = NULL;
1769 pre_insert_map = NULL;
1770 pre_delete_map = NULL;
1771 ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
1773 /* pre_insert and pre_delete are allocated later. */
1776 /* Free vars used for PRE analysis. */
1778 static void
1779 free_pre_mem (void)
1781 sbitmap_vector_free (transp);
1782 sbitmap_vector_free (comp);
1784 /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */
1786 if (pre_optimal)
1787 sbitmap_vector_free (pre_optimal);
1788 if (pre_redundant)
1789 sbitmap_vector_free (pre_redundant);
1790 if (pre_insert_map)
1791 sbitmap_vector_free (pre_insert_map);
1792 if (pre_delete_map)
1793 sbitmap_vector_free (pre_delete_map);
1795 transp = comp = NULL;
1796 pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
1799 /* Remove certain expressions from anticipatable and transparent
1800 sets of basic blocks that have incoming abnormal edge.
1801 For PRE remove potentially trapping expressions to avoid placing
1802 them on abnormal edges. For hoisting remove memory references that
1803 can be clobbered by calls. */
1805 static void
1806 prune_expressions (bool pre_p)
1808 sbitmap prune_exprs;
1809 struct expr *expr;
1810 unsigned int ui;
1811 basic_block bb;
1813 prune_exprs = sbitmap_alloc (expr_hash_table.n_elems);
1814 sbitmap_zero (prune_exprs);
1815 for (ui = 0; ui < expr_hash_table.size; ui++)
1817 for (expr = expr_hash_table.table[ui]; expr; expr = expr->next_same_hash)
1819 /* Note potentially trapping expressions. */
1820 if (may_trap_p (expr->expr))
1822 SET_BIT (prune_exprs, expr->bitmap_index);
1823 continue;
1826 if (!pre_p && MEM_P (expr->expr))
1827 /* Note memory references that can be clobbered by a call.
1828 We do not split abnormal edges in hoisting, so would
1829 a memory reference get hoisted along an abnormal edge,
1830 it would be placed /before/ the call. Therefore, only
1831 constant memory references can be hoisted along abnormal
1832 edges. */
1834 if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
1835 && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
1836 continue;
1838 if (MEM_READONLY_P (expr->expr)
1839 && !MEM_VOLATILE_P (expr->expr)
1840 && MEM_NOTRAP_P (expr->expr))
1841 /* Constant memory reference, e.g., a PIC address. */
1842 continue;
1844 /* ??? Optimally, we would use interprocedural alias
1845 analysis to determine if this mem is actually killed
1846 by this call. */
1848 SET_BIT (prune_exprs, expr->bitmap_index);
1853 FOR_EACH_BB (bb)
1855 edge e;
1856 edge_iterator ei;
1858 /* If the current block is the destination of an abnormal edge, we
1859 kill all trapping (for PRE) and memory (for hoist) expressions
1860 because we won't be able to properly place the instruction on
1861 the edge. So make them neither anticipatable nor transparent.
1862 This is fairly conservative.
1864 ??? For hoisting it may be necessary to check for set-and-jump
1865 instructions here, not just for abnormal edges. The general problem
1866 is that when an expression cannot not be placed right at the end of
1867 a basic block we should account for any side-effects of a subsequent
1868 jump instructions that could clobber the expression. It would
1869 be best to implement this check along the lines of
1870 hoist_expr_reaches_here_p where the target block is already known
1871 and, hence, there's no need to conservatively prune expressions on
1872 "intermediate" set-and-jump instructions. */
1873 FOR_EACH_EDGE (e, ei, bb->preds)
1874 if ((e->flags & EDGE_ABNORMAL)
1875 && (pre_p || CALL_P (BB_END (e->src))))
1877 sbitmap_difference (antloc[bb->index],
1878 antloc[bb->index], prune_exprs);
1879 sbitmap_difference (transp[bb->index],
1880 transp[bb->index], prune_exprs);
1881 break;
1885 sbitmap_free (prune_exprs);
1888 /* It may be necessary to insert a large number of insns on edges to
1889 make the existing occurrences of expressions fully redundant. This
1890 routine examines the set of insertions and deletions and if the ratio
1891 of insertions to deletions is too high for a particular expression, then
1892 the expression is removed from the insertion/deletion sets.
1894 N_ELEMS is the number of elements in the hash table. */
1896 static void
1897 prune_insertions_deletions (int n_elems)
1899 sbitmap_iterator sbi;
1900 sbitmap prune_exprs;
1902 /* We always use I to iterate over blocks/edges and J to iterate over
1903 expressions. */
1904 unsigned int i, j;
1906 /* Counts for the number of times an expression needs to be inserted and
1907 number of times an expression can be removed as a result. */
1908 int *insertions = GCNEWVEC (int, n_elems);
1909 int *deletions = GCNEWVEC (int, n_elems);
1911 /* Set of expressions which require too many insertions relative to
1912 the number of deletions achieved. We will prune these out of the
1913 insertion/deletion sets. */
1914 prune_exprs = sbitmap_alloc (n_elems);
1915 sbitmap_zero (prune_exprs);
1917 /* Iterate over the edges counting the number of times each expression
1918 needs to be inserted. */
1919 for (i = 0; i < (unsigned) n_edges; i++)
1921 EXECUTE_IF_SET_IN_SBITMAP (pre_insert_map[i], 0, j, sbi)
1922 insertions[j]++;
1925 /* Similarly for deletions, but those occur in blocks rather than on
1926 edges. */
1927 for (i = 0; i < (unsigned) last_basic_block; i++)
1929 EXECUTE_IF_SET_IN_SBITMAP (pre_delete_map[i], 0, j, sbi)
1930 deletions[j]++;
1933 /* Now that we have accurate counts, iterate over the elements in the
1934 hash table and see if any need too many insertions relative to the
1935 number of evaluations that can be removed. If so, mark them in
1936 PRUNE_EXPRS. */
1937 for (j = 0; j < (unsigned) n_elems; j++)
1938 if (deletions[j]
1939 && ((unsigned) insertions[j] / deletions[j]) > MAX_GCSE_INSERTION_RATIO)
1940 SET_BIT (prune_exprs, j);
1942 /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS. */
1943 EXECUTE_IF_SET_IN_SBITMAP (prune_exprs, 0, j, sbi)
1945 for (i = 0; i < (unsigned) n_edges; i++)
1946 RESET_BIT (pre_insert_map[i], j);
1948 for (i = 0; i < (unsigned) last_basic_block; i++)
1949 RESET_BIT (pre_delete_map[i], j);
1952 sbitmap_free (prune_exprs);
1953 free (insertions);
1954 free (deletions);
1957 /* Top level routine to do the dataflow analysis needed by PRE. */
1959 static struct edge_list *
1960 compute_pre_data (void)
1962 struct edge_list *edge_list;
1963 basic_block bb;
1965 compute_local_properties (transp, comp, antloc, &expr_hash_table);
1966 prune_expressions (true);
1967 sbitmap_vector_zero (ae_kill, last_basic_block);
1969 /* Compute ae_kill for each basic block using:
1971 ~(TRANSP | COMP)
1974 FOR_EACH_BB (bb)
1976 sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
1977 sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
1980 edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc,
1981 ae_kill, &pre_insert_map, &pre_delete_map);
1982 sbitmap_vector_free (antloc);
1983 antloc = NULL;
1984 sbitmap_vector_free (ae_kill);
1985 ae_kill = NULL;
1987 prune_insertions_deletions (expr_hash_table.n_elems);
1989 return edge_list;
1992 /* PRE utilities */
1994 /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach
1995 block BB.
1997 VISITED is a pointer to a working buffer for tracking which BB's have
1998 been visited. It is NULL for the top-level call.
2000 We treat reaching expressions that go through blocks containing the same
2001 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
2002 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2003 2 as not reaching. The intent is to improve the probability of finding
2004 only one reaching expression and to reduce register lifetimes by picking
2005 the closest such expression. */
2007 static int
2008 pre_expr_reaches_here_p_work (basic_block occr_bb, struct expr *expr,
2009 basic_block bb, char *visited)
2011 edge pred;
2012 edge_iterator ei;
2014 FOR_EACH_EDGE (pred, ei, bb->preds)
2016 basic_block pred_bb = pred->src;
2018 if (pred->src == ENTRY_BLOCK_PTR
2019 /* Has predecessor has already been visited? */
2020 || visited[pred_bb->index])
2021 ;/* Nothing to do. */
2023 /* Does this predecessor generate this expression? */
2024 else if (TEST_BIT (comp[pred_bb->index], expr->bitmap_index))
2026 /* Is this the occurrence we're looking for?
2027 Note that there's only one generating occurrence per block
2028 so we just need to check the block number. */
2029 if (occr_bb == pred_bb)
2030 return 1;
2032 visited[pred_bb->index] = 1;
2034 /* Ignore this predecessor if it kills the expression. */
2035 else if (! TEST_BIT (transp[pred_bb->index], expr->bitmap_index))
2036 visited[pred_bb->index] = 1;
2038 /* Neither gen nor kill. */
2039 else
2041 visited[pred_bb->index] = 1;
2042 if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
2043 return 1;
2047 /* All paths have been checked. */
2048 return 0;
2051 /* The wrapper for pre_expr_reaches_here_work that ensures that any
2052 memory allocated for that function is returned. */
2054 static int
2055 pre_expr_reaches_here_p (basic_block occr_bb, struct expr *expr, basic_block bb)
2057 int rval;
2058 char *visited = XCNEWVEC (char, last_basic_block);
2060 rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
2062 free (visited);
2063 return rval;
2066 /* Generate RTL to copy an EXPR to its `reaching_reg' and return it. */
2068 static rtx
2069 process_insert_insn (struct expr *expr)
2071 rtx reg = expr->reaching_reg;
2072 /* Copy the expression to make sure we don't have any sharing issues. */
2073 rtx exp = copy_rtx (expr->expr);
2074 rtx pat;
2076 start_sequence ();
2078 /* If the expression is something that's an operand, like a constant,
2079 just copy it to a register. */
2080 if (general_operand (exp, GET_MODE (reg)))
2081 emit_move_insn (reg, exp);
2083 /* Otherwise, make a new insn to compute this expression and make sure the
2084 insn will be recognized (this also adds any needed CLOBBERs). */
2085 else
2087 rtx insn = emit_insn (gen_rtx_SET (VOIDmode, reg, exp));
2089 if (insn_invalid_p (insn, false))
2090 gcc_unreachable ();
2093 pat = get_insns ();
2094 end_sequence ();
2096 return pat;
2099 /* Add EXPR to the end of basic block BB.
2101 This is used by both the PRE and code hoisting. */
2103 static void
2104 insert_insn_end_basic_block (struct expr *expr, basic_block bb)
2106 rtx insn = BB_END (bb);
2107 rtx new_insn;
2108 rtx reg = expr->reaching_reg;
2109 int regno = REGNO (reg);
2110 rtx pat, pat_end;
2112 pat = process_insert_insn (expr);
2113 gcc_assert (pat && INSN_P (pat));
2115 pat_end = pat;
2116 while (NEXT_INSN (pat_end) != NULL_RTX)
2117 pat_end = NEXT_INSN (pat_end);
2119 /* If the last insn is a jump, insert EXPR in front [taking care to
2120 handle cc0, etc. properly]. Similarly we need to care trapping
2121 instructions in presence of non-call exceptions. */
2123 if (JUMP_P (insn)
2124 || (NONJUMP_INSN_P (insn)
2125 && (!single_succ_p (bb)
2126 || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
2128 #ifdef HAVE_cc0
2129 rtx note;
2130 #endif
2132 /* If this is a jump table, then we can't insert stuff here. Since
2133 we know the previous real insn must be the tablejump, we insert
2134 the new instruction just before the tablejump. */
2135 if (GET_CODE (PATTERN (insn)) == ADDR_VEC
2136 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
2137 insn = prev_active_insn (insn);
2139 #ifdef HAVE_cc0
2140 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
2141 if cc0 isn't set. */
2142 note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
2143 if (note)
2144 insn = XEXP (note, 0);
2145 else
2147 rtx maybe_cc0_setter = prev_nonnote_insn (insn);
2148 if (maybe_cc0_setter
2149 && INSN_P (maybe_cc0_setter)
2150 && sets_cc0_p (PATTERN (maybe_cc0_setter)))
2151 insn = maybe_cc0_setter;
2153 #endif
2154 /* FIXME: What if something in cc0/jump uses value set in new insn? */
2155 new_insn = emit_insn_before_noloc (pat, insn, bb);
2158 /* Likewise if the last insn is a call, as will happen in the presence
2159 of exception handling. */
2160 else if (CALL_P (insn)
2161 && (!single_succ_p (bb)
2162 || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
2164 /* Keeping in mind targets with small register classes and parameters
2165 in registers, we search backward and place the instructions before
2166 the first parameter is loaded. Do this for everyone for consistency
2167 and a presumption that we'll get better code elsewhere as well. */
2169 /* Since different machines initialize their parameter registers
2170 in different orders, assume nothing. Collect the set of all
2171 parameter registers. */
2172 insn = find_first_parameter_load (insn, BB_HEAD (bb));
2174 /* If we found all the parameter loads, then we want to insert
2175 before the first parameter load.
2177 If we did not find all the parameter loads, then we might have
2178 stopped on the head of the block, which could be a CODE_LABEL.
2179 If we inserted before the CODE_LABEL, then we would be putting
2180 the insn in the wrong basic block. In that case, put the insn
2181 after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */
2182 while (LABEL_P (insn)
2183 || NOTE_INSN_BASIC_BLOCK_P (insn))
2184 insn = NEXT_INSN (insn);
2186 new_insn = emit_insn_before_noloc (pat, insn, bb);
2188 else
2189 new_insn = emit_insn_after_noloc (pat, insn, bb);
2191 while (1)
2193 if (INSN_P (pat))
2194 add_label_notes (PATTERN (pat), new_insn);
2195 if (pat == pat_end)
2196 break;
2197 pat = NEXT_INSN (pat);
2200 gcse_create_count++;
2202 if (dump_file)
2204 fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ",
2205 bb->index, INSN_UID (new_insn));
2206 fprintf (dump_file, "copying expression %d to reg %d\n",
2207 expr->bitmap_index, regno);
2211 /* Insert partially redundant expressions on edges in the CFG to make
2212 the expressions fully redundant. */
2214 static int
2215 pre_edge_insert (struct edge_list *edge_list, struct expr **index_map)
2217 int e, i, j, num_edges, set_size, did_insert = 0;
2218 sbitmap *inserted;
2220 /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
2221 if it reaches any of the deleted expressions. */
2223 set_size = pre_insert_map[0]->size;
2224 num_edges = NUM_EDGES (edge_list);
2225 inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems);
2226 sbitmap_vector_zero (inserted, num_edges);
2228 for (e = 0; e < num_edges; e++)
2230 int indx;
2231 basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
2233 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
2235 SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
2237 for (j = indx;
2238 insert && j < (int) expr_hash_table.n_elems;
2239 j++, insert >>= 1)
2240 if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
2242 struct expr *expr = index_map[j];
2243 struct occr *occr;
2245 /* Now look at each deleted occurrence of this expression. */
2246 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2248 if (! occr->deleted_p)
2249 continue;
2251 /* Insert this expression on this edge if it would
2252 reach the deleted occurrence in BB. */
2253 if (!TEST_BIT (inserted[e], j))
2255 rtx insn;
2256 edge eg = INDEX_EDGE (edge_list, e);
2258 /* We can't insert anything on an abnormal and
2259 critical edge, so we insert the insn at the end of
2260 the previous block. There are several alternatives
2261 detailed in Morgans book P277 (sec 10.5) for
2262 handling this situation. This one is easiest for
2263 now. */
2265 if (eg->flags & EDGE_ABNORMAL)
2266 insert_insn_end_basic_block (index_map[j], bb);
2267 else
2269 insn = process_insert_insn (index_map[j]);
2270 insert_insn_on_edge (insn, eg);
2273 if (dump_file)
2275 fprintf (dump_file, "PRE: edge (%d,%d), ",
2276 bb->index,
2277 INDEX_EDGE_SUCC_BB (edge_list, e)->index);
2278 fprintf (dump_file, "copy expression %d\n",
2279 expr->bitmap_index);
2282 update_ld_motion_stores (expr);
2283 SET_BIT (inserted[e], j);
2284 did_insert = 1;
2285 gcse_create_count++;
2292 sbitmap_vector_free (inserted);
2293 return did_insert;
2296 /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
2297 Given "old_reg <- expr" (INSN), instead of adding after it
2298 reaching_reg <- old_reg
2299 it's better to do the following:
2300 reaching_reg <- expr
2301 old_reg <- reaching_reg
2302 because this way copy propagation can discover additional PRE
2303 opportunities. But if this fails, we try the old way.
2304 When "expr" is a store, i.e.
2305 given "MEM <- old_reg", instead of adding after it
2306 reaching_reg <- old_reg
2307 it's better to add it before as follows:
2308 reaching_reg <- old_reg
2309 MEM <- reaching_reg. */
2311 static void
2312 pre_insert_copy_insn (struct expr *expr, rtx insn)
2314 rtx reg = expr->reaching_reg;
2315 int regno = REGNO (reg);
2316 int indx = expr->bitmap_index;
2317 rtx pat = PATTERN (insn);
2318 rtx set, first_set, new_insn;
2319 rtx old_reg;
2320 int i;
2322 /* This block matches the logic in hash_scan_insn. */
2323 switch (GET_CODE (pat))
2325 case SET:
2326 set = pat;
2327 break;
2329 case PARALLEL:
2330 /* Search through the parallel looking for the set whose
2331 source was the expression that we're interested in. */
2332 first_set = NULL_RTX;
2333 set = NULL_RTX;
2334 for (i = 0; i < XVECLEN (pat, 0); i++)
2336 rtx x = XVECEXP (pat, 0, i);
2337 if (GET_CODE (x) == SET)
2339 /* If the source was a REG_EQUAL or REG_EQUIV note, we
2340 may not find an equivalent expression, but in this
2341 case the PARALLEL will have a single set. */
2342 if (first_set == NULL_RTX)
2343 first_set = x;
2344 if (expr_equiv_p (SET_SRC (x), expr->expr))
2346 set = x;
2347 break;
2352 gcc_assert (first_set);
2353 if (set == NULL_RTX)
2354 set = first_set;
2355 break;
2357 default:
2358 gcc_unreachable ();
2361 if (REG_P (SET_DEST (set)))
2363 old_reg = SET_DEST (set);
2364 /* Check if we can modify the set destination in the original insn. */
2365 if (validate_change (insn, &SET_DEST (set), reg, 0))
2367 new_insn = gen_move_insn (old_reg, reg);
2368 new_insn = emit_insn_after (new_insn, insn);
2370 else
2372 new_insn = gen_move_insn (reg, old_reg);
2373 new_insn = emit_insn_after (new_insn, insn);
2376 else /* This is possible only in case of a store to memory. */
2378 old_reg = SET_SRC (set);
2379 new_insn = gen_move_insn (reg, old_reg);
2381 /* Check if we can modify the set source in the original insn. */
2382 if (validate_change (insn, &SET_SRC (set), reg, 0))
2383 new_insn = emit_insn_before (new_insn, insn);
2384 else
2385 new_insn = emit_insn_after (new_insn, insn);
2388 gcse_create_count++;
2390 if (dump_file)
2391 fprintf (dump_file,
2392 "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
2393 BLOCK_FOR_INSN (insn)->index, INSN_UID (new_insn), indx,
2394 INSN_UID (insn), regno);
2397 /* Copy available expressions that reach the redundant expression
2398 to `reaching_reg'. */
2400 static void
2401 pre_insert_copies (void)
2403 unsigned int i, added_copy;
2404 struct expr *expr;
2405 struct occr *occr;
2406 struct occr *avail;
2408 /* For each available expression in the table, copy the result to
2409 `reaching_reg' if the expression reaches a deleted one.
2411 ??? The current algorithm is rather brute force.
2412 Need to do some profiling. */
2414 for (i = 0; i < expr_hash_table.size; i++)
2415 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2417 /* If the basic block isn't reachable, PPOUT will be TRUE. However,
2418 we don't want to insert a copy here because the expression may not
2419 really be redundant. So only insert an insn if the expression was
2420 deleted. This test also avoids further processing if the
2421 expression wasn't deleted anywhere. */
2422 if (expr->reaching_reg == NULL)
2423 continue;
2425 /* Set when we add a copy for that expression. */
2426 added_copy = 0;
2428 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2430 if (! occr->deleted_p)
2431 continue;
2433 for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
2435 rtx insn = avail->insn;
2437 /* No need to handle this one if handled already. */
2438 if (avail->copied_p)
2439 continue;
2441 /* Don't handle this one if it's a redundant one. */
2442 if (INSN_DELETED_P (insn))
2443 continue;
2445 /* Or if the expression doesn't reach the deleted one. */
2446 if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
2447 expr,
2448 BLOCK_FOR_INSN (occr->insn)))
2449 continue;
2451 added_copy = 1;
2453 /* Copy the result of avail to reaching_reg. */
2454 pre_insert_copy_insn (expr, insn);
2455 avail->copied_p = 1;
2459 if (added_copy)
2460 update_ld_motion_stores (expr);
2464 /* Emit move from SRC to DEST noting the equivalence with expression computed
2465 in INSN. */
2467 static rtx
2468 gcse_emit_move_after (rtx dest, rtx src, rtx insn)
2470 rtx new_rtx;
2471 rtx set = single_set (insn), set2;
2472 rtx note;
2473 rtx eqv;
2475 /* This should never fail since we're creating a reg->reg copy
2476 we've verified to be valid. */
2478 new_rtx = emit_insn_after (gen_move_insn (dest, src), insn);
2480 /* Note the equivalence for local CSE pass. */
2481 set2 = single_set (new_rtx);
2482 if (!set2 || !rtx_equal_p (SET_DEST (set2), dest))
2483 return new_rtx;
2484 if ((note = find_reg_equal_equiv_note (insn)))
2485 eqv = XEXP (note, 0);
2486 else
2487 eqv = SET_SRC (set);
2489 set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv));
2491 return new_rtx;
2494 /* Delete redundant computations.
2495 Deletion is done by changing the insn to copy the `reaching_reg' of
2496 the expression into the result of the SET. It is left to later passes
2497 (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
2499 Return nonzero if a change is made. */
2501 static int
2502 pre_delete (void)
2504 unsigned int i;
2505 int changed;
2506 struct expr *expr;
2507 struct occr *occr;
2509 changed = 0;
2510 for (i = 0; i < expr_hash_table.size; i++)
2511 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2513 int indx = expr->bitmap_index;
2515 /* We only need to search antic_occr since we require ANTLOC != 0. */
2516 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2518 rtx insn = occr->insn;
2519 rtx set;
2520 basic_block bb = BLOCK_FOR_INSN (insn);
2522 /* We only delete insns that have a single_set. */
2523 if (TEST_BIT (pre_delete_map[bb->index], indx)
2524 && (set = single_set (insn)) != 0
2525 && dbg_cnt (pre_insn))
2527 /* Create a pseudo-reg to store the result of reaching
2528 expressions into. Get the mode for the new pseudo from
2529 the mode of the original destination pseudo. */
2530 if (expr->reaching_reg == NULL)
2531 expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set));
2533 gcse_emit_move_after (SET_DEST (set), expr->reaching_reg, insn);
2534 delete_insn (insn);
2535 occr->deleted_p = 1;
2536 changed = 1;
2537 gcse_subst_count++;
2539 if (dump_file)
2541 fprintf (dump_file,
2542 "PRE: redundant insn %d (expression %d) in ",
2543 INSN_UID (insn), indx);
2544 fprintf (dump_file, "bb %d, reaching reg is %d\n",
2545 bb->index, REGNO (expr->reaching_reg));
2551 return changed;
2554 /* Perform GCSE optimizations using PRE.
2555 This is called by one_pre_gcse_pass after all the dataflow analysis
2556 has been done.
2558 This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
2559 lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
2560 Compiler Design and Implementation.
2562 ??? A new pseudo reg is created to hold the reaching expression. The nice
2563 thing about the classical approach is that it would try to use an existing
2564 reg. If the register can't be adequately optimized [i.e. we introduce
2565 reload problems], one could add a pass here to propagate the new register
2566 through the block.
2568 ??? We don't handle single sets in PARALLELs because we're [currently] not
2569 able to copy the rest of the parallel when we insert copies to create full
2570 redundancies from partial redundancies. However, there's no reason why we
2571 can't handle PARALLELs in the cases where there are no partial
2572 redundancies. */
2574 static int
2575 pre_gcse (struct edge_list *edge_list)
2577 unsigned int i;
2578 int did_insert, changed;
2579 struct expr **index_map;
2580 struct expr *expr;
2582 /* Compute a mapping from expression number (`bitmap_index') to
2583 hash table entry. */
2585 index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
2586 for (i = 0; i < expr_hash_table.size; i++)
2587 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2588 index_map[expr->bitmap_index] = expr;
2590 /* Delete the redundant insns first so that
2591 - we know what register to use for the new insns and for the other
2592 ones with reaching expressions
2593 - we know which insns are redundant when we go to create copies */
2595 changed = pre_delete ();
2596 did_insert = pre_edge_insert (edge_list, index_map);
2598 /* In other places with reaching expressions, copy the expression to the
2599 specially allocated pseudo-reg that reaches the redundant expr. */
2600 pre_insert_copies ();
2601 if (did_insert)
2603 commit_edge_insertions ();
2604 changed = 1;
2607 free (index_map);
2608 return changed;
2611 /* Top level routine to perform one PRE GCSE pass.
2613 Return nonzero if a change was made. */
2615 static int
2616 one_pre_gcse_pass (void)
2618 int changed = 0;
2620 gcse_subst_count = 0;
2621 gcse_create_count = 0;
2623 /* Return if there's nothing to do, or it is too expensive. */
2624 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
2625 || is_too_expensive (_("PRE disabled")))
2626 return 0;
2628 /* We need alias. */
2629 init_alias_analysis ();
2631 bytes_used = 0;
2632 gcc_obstack_init (&gcse_obstack);
2633 alloc_gcse_mem ();
2635 alloc_hash_table (&expr_hash_table);
2636 add_noreturn_fake_exit_edges ();
2637 if (flag_gcse_lm)
2638 compute_ld_motion_mems ();
2640 compute_hash_table (&expr_hash_table);
2641 if (flag_gcse_lm)
2642 trim_ld_motion_mems ();
2643 if (dump_file)
2644 dump_hash_table (dump_file, "Expression", &expr_hash_table);
2646 if (expr_hash_table.n_elems > 0)
2648 struct edge_list *edge_list;
2649 alloc_pre_mem (last_basic_block, expr_hash_table.n_elems);
2650 edge_list = compute_pre_data ();
2651 changed |= pre_gcse (edge_list);
2652 free_edge_list (edge_list);
2653 free_pre_mem ();
2656 if (flag_gcse_lm)
2657 free_ld_motion_mems ();
2658 remove_fake_exit_edges ();
2659 free_hash_table (&expr_hash_table);
2661 free_gcse_mem ();
2662 obstack_free (&gcse_obstack, NULL);
2664 /* We are finished with alias. */
2665 end_alias_analysis ();
2667 if (dump_file)
2669 fprintf (dump_file, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ",
2670 current_function_name (), n_basic_blocks, bytes_used);
2671 fprintf (dump_file, "%d substs, %d insns created\n",
2672 gcse_subst_count, gcse_create_count);
2675 return changed;
2678 /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
2679 to INSN. If such notes are added to an insn which references a
2680 CODE_LABEL, the LABEL_NUSES count is incremented. We have to add
2681 that note, because the following loop optimization pass requires
2682 them. */
2684 /* ??? If there was a jump optimization pass after gcse and before loop,
2685 then we would not need to do this here, because jump would add the
2686 necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes. */
2688 static void
2689 add_label_notes (rtx x, rtx insn)
2691 enum rtx_code code = GET_CODE (x);
2692 int i, j;
2693 const char *fmt;
2695 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
2697 /* This code used to ignore labels that referred to dispatch tables to
2698 avoid flow generating (slightly) worse code.
2700 We no longer ignore such label references (see LABEL_REF handling in
2701 mark_jump_label for additional information). */
2703 /* There's no reason for current users to emit jump-insns with
2704 such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
2705 notes. */
2706 gcc_assert (!JUMP_P (insn));
2707 add_reg_note (insn, REG_LABEL_OPERAND, XEXP (x, 0));
2709 if (LABEL_P (XEXP (x, 0)))
2710 LABEL_NUSES (XEXP (x, 0))++;
2712 return;
2715 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2717 if (fmt[i] == 'e')
2718 add_label_notes (XEXP (x, i), insn);
2719 else if (fmt[i] == 'E')
2720 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2721 add_label_notes (XVECEXP (x, i, j), insn);
2725 /* Code Hoisting variables and subroutines. */
2727 /* Very busy expressions. */
2728 static sbitmap *hoist_vbein;
2729 static sbitmap *hoist_vbeout;
2731 /* ??? We could compute post dominators and run this algorithm in
2732 reverse to perform tail merging, doing so would probably be
2733 more effective than the tail merging code in jump.c.
2735 It's unclear if tail merging could be run in parallel with
2736 code hoisting. It would be nice. */
2738 /* Allocate vars used for code hoisting analysis. */
2740 static void
2741 alloc_code_hoist_mem (int n_blocks, int n_exprs)
2743 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
2744 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
2745 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
2747 hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
2748 hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
2751 /* Free vars used for code hoisting analysis. */
2753 static void
2754 free_code_hoist_mem (void)
2756 sbitmap_vector_free (antloc);
2757 sbitmap_vector_free (transp);
2758 sbitmap_vector_free (comp);
2760 sbitmap_vector_free (hoist_vbein);
2761 sbitmap_vector_free (hoist_vbeout);
2763 free_dominance_info (CDI_DOMINATORS);
2766 /* Compute the very busy expressions at entry/exit from each block.
2768 An expression is very busy if all paths from a given point
2769 compute the expression. */
2771 static void
2772 compute_code_hoist_vbeinout (void)
2774 int changed, passes;
2775 basic_block bb;
2777 sbitmap_vector_zero (hoist_vbeout, last_basic_block);
2778 sbitmap_vector_zero (hoist_vbein, last_basic_block);
2780 passes = 0;
2781 changed = 1;
2783 while (changed)
2785 changed = 0;
2787 /* We scan the blocks in the reverse order to speed up
2788 the convergence. */
2789 FOR_EACH_BB_REVERSE (bb)
2791 if (bb->next_bb != EXIT_BLOCK_PTR)
2793 sbitmap_intersection_of_succs (hoist_vbeout[bb->index],
2794 hoist_vbein, bb->index);
2796 /* Include expressions in VBEout that are calculated
2797 in BB and available at its end. */
2798 sbitmap_a_or_b (hoist_vbeout[bb->index],
2799 hoist_vbeout[bb->index], comp[bb->index]);
2802 changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index],
2803 antloc[bb->index],
2804 hoist_vbeout[bb->index],
2805 transp[bb->index]);
2808 passes++;
2811 if (dump_file)
2813 fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
2815 FOR_EACH_BB (bb)
2817 fprintf (dump_file, "vbein (%d): ", bb->index);
2818 dump_sbitmap_file (dump_file, hoist_vbein[bb->index]);
2819 fprintf (dump_file, "vbeout(%d): ", bb->index);
2820 dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]);
2825 /* Top level routine to do the dataflow analysis needed by code hoisting. */
2827 static void
2828 compute_code_hoist_data (void)
2830 compute_local_properties (transp, comp, antloc, &expr_hash_table);
2831 prune_expressions (false);
2832 compute_code_hoist_vbeinout ();
2833 calculate_dominance_info (CDI_DOMINATORS);
2834 if (dump_file)
2835 fprintf (dump_file, "\n");
2838 /* Determine if the expression identified by EXPR_INDEX would
2839 reach BB unimpared if it was placed at the end of EXPR_BB.
2840 Stop the search if the expression would need to be moved more
2841 than DISTANCE instructions.
2843 It's unclear exactly what Muchnick meant by "unimpared". It seems
2844 to me that the expression must either be computed or transparent in
2845 *every* block in the path(s) from EXPR_BB to BB. Any other definition
2846 would allow the expression to be hoisted out of loops, even if
2847 the expression wasn't a loop invariant.
2849 Contrast this to reachability for PRE where an expression is
2850 considered reachable if *any* path reaches instead of *all*
2851 paths. */
2853 static int
2854 hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb,
2855 char *visited, int distance, int *bb_size)
2857 edge pred;
2858 edge_iterator ei;
2859 int visited_allocated_locally = 0;
2861 /* Terminate the search if distance, for which EXPR is allowed to move,
2862 is exhausted. */
2863 if (distance > 0)
2865 distance -= bb_size[bb->index];
2867 if (distance <= 0)
2868 return 0;
2870 else
2871 gcc_assert (distance == 0);
2873 if (visited == NULL)
2875 visited_allocated_locally = 1;
2876 visited = XCNEWVEC (char, last_basic_block);
2879 FOR_EACH_EDGE (pred, ei, bb->preds)
2881 basic_block pred_bb = pred->src;
2883 if (pred->src == ENTRY_BLOCK_PTR)
2884 break;
2885 else if (pred_bb == expr_bb)
2886 continue;
2887 else if (visited[pred_bb->index])
2888 continue;
2890 else if (! TEST_BIT (transp[pred_bb->index], expr_index))
2891 break;
2893 /* Not killed. */
2894 else
2896 visited[pred_bb->index] = 1;
2897 if (! hoist_expr_reaches_here_p (expr_bb, expr_index, pred_bb,
2898 visited, distance, bb_size))
2899 break;
2902 if (visited_allocated_locally)
2903 free (visited);
2905 return (pred == NULL);
2908 /* Find occurrence in BB. */
2910 static struct occr *
2911 find_occr_in_bb (struct occr *occr, basic_block bb)
2913 /* Find the right occurrence of this expression. */
2914 while (occr && BLOCK_FOR_INSN (occr->insn) != bb)
2915 occr = occr->next;
2917 return occr;
2920 /* Actually perform code hoisting. */
2922 static int
2923 hoist_code (void)
2925 basic_block bb, dominated;
2926 VEC (basic_block, heap) *dom_tree_walk;
2927 unsigned int dom_tree_walk_index;
2928 VEC (basic_block, heap) *domby;
2929 unsigned int i,j;
2930 struct expr **index_map;
2931 struct expr *expr;
2932 int *to_bb_head;
2933 int *bb_size;
2934 int changed = 0;
2936 /* Compute a mapping from expression number (`bitmap_index') to
2937 hash table entry. */
2939 index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
2940 for (i = 0; i < expr_hash_table.size; i++)
2941 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2942 index_map[expr->bitmap_index] = expr;
2944 /* Calculate sizes of basic blocks and note how far
2945 each instruction is from the start of its block. We then use this
2946 data to restrict distance an expression can travel. */
2948 to_bb_head = XCNEWVEC (int, get_max_uid ());
2949 bb_size = XCNEWVEC (int, last_basic_block);
2951 FOR_EACH_BB (bb)
2953 rtx insn;
2954 int to_head;
2956 to_head = 0;
2957 FOR_BB_INSNS (bb, insn)
2959 /* Don't count debug instructions to avoid them affecting
2960 decision choices. */
2961 if (NONDEBUG_INSN_P (insn))
2962 to_bb_head[INSN_UID (insn)] = to_head++;
2965 bb_size[bb->index] = to_head;
2968 gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR->succs) == 1
2969 && (EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest
2970 == ENTRY_BLOCK_PTR->next_bb));
2972 dom_tree_walk = get_all_dominated_blocks (CDI_DOMINATORS,
2973 ENTRY_BLOCK_PTR->next_bb);
2975 /* Walk over each basic block looking for potentially hoistable
2976 expressions, nothing gets hoisted from the entry block. */
2977 FOR_EACH_VEC_ELT (basic_block, dom_tree_walk, dom_tree_walk_index, bb)
2979 domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH);
2981 if (VEC_length (basic_block, domby) == 0)
2982 continue;
2984 /* Examine each expression that is very busy at the exit of this
2985 block. These are the potentially hoistable expressions. */
2986 for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++)
2988 if (TEST_BIT (hoist_vbeout[bb->index], i))
2990 /* Current expression. */
2991 struct expr *expr = index_map[i];
2992 /* Number of occurrences of EXPR that can be hoisted to BB. */
2993 int hoistable = 0;
2994 /* Basic blocks that have occurrences reachable from BB. */
2995 bitmap_head _from_bbs, *from_bbs = &_from_bbs;
2996 /* Occurrences reachable from BB. */
2997 VEC (occr_t, heap) *occrs_to_hoist = NULL;
2998 /* We want to insert the expression into BB only once, so
2999 note when we've inserted it. */
3000 int insn_inserted_p;
3001 occr_t occr;
3003 bitmap_initialize (from_bbs, 0);
3005 /* If an expression is computed in BB and is available at end of
3006 BB, hoist all occurrences dominated by BB to BB. */
3007 if (TEST_BIT (comp[bb->index], i))
3009 occr = find_occr_in_bb (expr->antic_occr, bb);
3011 if (occr)
3013 /* An occurrence might've been already deleted
3014 while processing a dominator of BB. */
3015 if (!occr->deleted_p)
3017 gcc_assert (NONDEBUG_INSN_P (occr->insn));
3018 hoistable++;
3021 else
3022 hoistable++;
3025 /* We've found a potentially hoistable expression, now
3026 we look at every block BB dominates to see if it
3027 computes the expression. */
3028 FOR_EACH_VEC_ELT (basic_block, domby, j, dominated)
3030 int max_distance;
3032 /* Ignore self dominance. */
3033 if (bb == dominated)
3034 continue;
3035 /* We've found a dominated block, now see if it computes
3036 the busy expression and whether or not moving that
3037 expression to the "beginning" of that block is safe. */
3038 if (!TEST_BIT (antloc[dominated->index], i))
3039 continue;
3041 occr = find_occr_in_bb (expr->antic_occr, dominated);
3042 gcc_assert (occr);
3044 /* An occurrence might've been already deleted
3045 while processing a dominator of BB. */
3046 if (occr->deleted_p)
3047 continue;
3048 gcc_assert (NONDEBUG_INSN_P (occr->insn));
3050 max_distance = expr->max_distance;
3051 if (max_distance > 0)
3052 /* Adjust MAX_DISTANCE to account for the fact that
3053 OCCR won't have to travel all of DOMINATED, but
3054 only part of it. */
3055 max_distance += (bb_size[dominated->index]
3056 - to_bb_head[INSN_UID (occr->insn)]);
3058 /* Note if the expression would reach the dominated block
3059 unimpared if it was placed at the end of BB.
3061 Keep track of how many times this expression is hoistable
3062 from a dominated block into BB. */
3063 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL,
3064 max_distance, bb_size))
3066 hoistable++;
3067 VEC_safe_push (occr_t, heap,
3068 occrs_to_hoist, occr);
3069 bitmap_set_bit (from_bbs, dominated->index);
3073 /* If we found more than one hoistable occurrence of this
3074 expression, then note it in the vector of expressions to
3075 hoist. It makes no sense to hoist things which are computed
3076 in only one BB, and doing so tends to pessimize register
3077 allocation. One could increase this value to try harder
3078 to avoid any possible code expansion due to register
3079 allocation issues; however experiments have shown that
3080 the vast majority of hoistable expressions are only movable
3081 from two successors, so raising this threshold is likely
3082 to nullify any benefit we get from code hoisting. */
3083 if (hoistable > 1 && dbg_cnt (hoist_insn))
3085 /* If (hoistable != VEC_length), then there is
3086 an occurrence of EXPR in BB itself. Don't waste
3087 time looking for LCA in this case. */
3088 if ((unsigned) hoistable
3089 == VEC_length (occr_t, occrs_to_hoist))
3091 basic_block lca;
3093 lca = nearest_common_dominator_for_set (CDI_DOMINATORS,
3094 from_bbs);
3095 if (lca != bb)
3096 /* Punt, it's better to hoist these occurrences to
3097 LCA. */
3098 VEC_free (occr_t, heap, occrs_to_hoist);
3101 else
3102 /* Punt, no point hoisting a single occurence. */
3103 VEC_free (occr_t, heap, occrs_to_hoist);
3105 insn_inserted_p = 0;
3107 /* Walk through occurrences of I'th expressions we want
3108 to hoist to BB and make the transformations. */
3109 FOR_EACH_VEC_ELT (occr_t, occrs_to_hoist, j, occr)
3111 rtx insn;
3112 rtx set;
3114 gcc_assert (!occr->deleted_p);
3116 insn = occr->insn;
3117 set = single_set (insn);
3118 gcc_assert (set);
3120 /* Create a pseudo-reg to store the result of reaching
3121 expressions into. Get the mode for the new pseudo
3122 from the mode of the original destination pseudo.
3124 It is important to use new pseudos whenever we
3125 emit a set. This will allow reload to use
3126 rematerialization for such registers. */
3127 if (!insn_inserted_p)
3128 expr->reaching_reg
3129 = gen_reg_rtx_and_attrs (SET_DEST (set));
3131 gcse_emit_move_after (SET_DEST (set), expr->reaching_reg,
3132 insn);
3133 delete_insn (insn);
3134 occr->deleted_p = 1;
3135 changed = 1;
3136 gcse_subst_count++;
3138 if (!insn_inserted_p)
3140 insert_insn_end_basic_block (expr, bb);
3141 insn_inserted_p = 1;
3145 VEC_free (occr_t, heap, occrs_to_hoist);
3146 bitmap_clear (from_bbs);
3149 VEC_free (basic_block, heap, domby);
3152 VEC_free (basic_block, heap, dom_tree_walk);
3153 free (bb_size);
3154 free (to_bb_head);
3155 free (index_map);
3157 return changed;
3160 /* Top level routine to perform one code hoisting (aka unification) pass
3162 Return nonzero if a change was made. */
3164 static int
3165 one_code_hoisting_pass (void)
3167 int changed = 0;
3169 gcse_subst_count = 0;
3170 gcse_create_count = 0;
3172 /* Return if there's nothing to do, or it is too expensive. */
3173 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
3174 || is_too_expensive (_("GCSE disabled")))
3175 return 0;
3177 doing_code_hoisting_p = true;
3179 /* We need alias. */
3180 init_alias_analysis ();
3182 bytes_used = 0;
3183 gcc_obstack_init (&gcse_obstack);
3184 alloc_gcse_mem ();
3186 alloc_hash_table (&expr_hash_table);
3187 compute_hash_table (&expr_hash_table);
3188 if (dump_file)
3189 dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table);
3191 if (expr_hash_table.n_elems > 0)
3193 alloc_code_hoist_mem (last_basic_block, expr_hash_table.n_elems);
3194 compute_code_hoist_data ();
3195 changed = hoist_code ();
3196 free_code_hoist_mem ();
3199 free_hash_table (&expr_hash_table);
3200 free_gcse_mem ();
3201 obstack_free (&gcse_obstack, NULL);
3203 /* We are finished with alias. */
3204 end_alias_analysis ();
3206 if (dump_file)
3208 fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ",
3209 current_function_name (), n_basic_blocks, bytes_used);
3210 fprintf (dump_file, "%d substs, %d insns created\n",
3211 gcse_subst_count, gcse_create_count);
3214 doing_code_hoisting_p = false;
3216 return changed;
3219 /* Here we provide the things required to do store motion towards the exit.
3220 In order for this to be effective, gcse also needed to be taught how to
3221 move a load when it is killed only by a store to itself.
3223 int i;
3224 float a[10];
3226 void foo(float scale)
3228 for (i=0; i<10; i++)
3229 a[i] *= scale;
3232 'i' is both loaded and stored to in the loop. Normally, gcse cannot move
3233 the load out since its live around the loop, and stored at the bottom
3234 of the loop.
3236 The 'Load Motion' referred to and implemented in this file is
3237 an enhancement to gcse which when using edge based LCM, recognizes
3238 this situation and allows gcse to move the load out of the loop.
3240 Once gcse has hoisted the load, store motion can then push this
3241 load towards the exit, and we end up with no loads or stores of 'i'
3242 in the loop. */
3244 static hashval_t
3245 pre_ldst_expr_hash (const void *p)
3247 int do_not_record_p = 0;
3248 const struct ls_expr *const x = (const struct ls_expr *) p;
3249 return
3250 hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false);
3253 static int
3254 pre_ldst_expr_eq (const void *p1, const void *p2)
3256 const struct ls_expr *const ptr1 = (const struct ls_expr *) p1,
3257 *const ptr2 = (const struct ls_expr *) p2;
3258 return expr_equiv_p (ptr1->pattern, ptr2->pattern);
3261 /* This will search the ldst list for a matching expression. If it
3262 doesn't find one, we create one and initialize it. */
3264 static struct ls_expr *
3265 ldst_entry (rtx x)
3267 int do_not_record_p = 0;
3268 struct ls_expr * ptr;
3269 unsigned int hash;
3270 void **slot;
3271 struct ls_expr e;
3273 hash = hash_rtx (x, GET_MODE (x), &do_not_record_p,
3274 NULL, /*have_reg_qty=*/false);
3276 e.pattern = x;
3277 slot = htab_find_slot_with_hash (pre_ldst_table, &e, hash, INSERT);
3278 if (*slot)
3279 return (struct ls_expr *)*slot;
3281 ptr = XNEW (struct ls_expr);
3283 ptr->next = pre_ldst_mems;
3284 ptr->expr = NULL;
3285 ptr->pattern = x;
3286 ptr->pattern_regs = NULL_RTX;
3287 ptr->loads = NULL_RTX;
3288 ptr->stores = NULL_RTX;
3289 ptr->reaching_reg = NULL_RTX;
3290 ptr->invalid = 0;
3291 ptr->index = 0;
3292 ptr->hash_index = hash;
3293 pre_ldst_mems = ptr;
3294 *slot = ptr;
3296 return ptr;
3299 /* Free up an individual ldst entry. */
3301 static void
3302 free_ldst_entry (struct ls_expr * ptr)
3304 free_INSN_LIST_list (& ptr->loads);
3305 free_INSN_LIST_list (& ptr->stores);
3307 free (ptr);
3310 /* Free up all memory associated with the ldst list. */
3312 static void
3313 free_ld_motion_mems (void)
3315 if (pre_ldst_table)
3316 htab_delete (pre_ldst_table);
3317 pre_ldst_table = NULL;
3319 while (pre_ldst_mems)
3321 struct ls_expr * tmp = pre_ldst_mems;
3323 pre_ldst_mems = pre_ldst_mems->next;
3325 free_ldst_entry (tmp);
3328 pre_ldst_mems = NULL;
3331 /* Dump debugging info about the ldst list. */
3333 static void
3334 print_ldst_list (FILE * file)
3336 struct ls_expr * ptr;
3338 fprintf (file, "LDST list: \n");
3340 for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
3342 fprintf (file, " Pattern (%3d): ", ptr->index);
3344 print_rtl (file, ptr->pattern);
3346 fprintf (file, "\n Loads : ");
3348 if (ptr->loads)
3349 print_rtl (file, ptr->loads);
3350 else
3351 fprintf (file, "(nil)");
3353 fprintf (file, "\n Stores : ");
3355 if (ptr->stores)
3356 print_rtl (file, ptr->stores);
3357 else
3358 fprintf (file, "(nil)");
3360 fprintf (file, "\n\n");
3363 fprintf (file, "\n");
3366 /* Returns 1 if X is in the list of ldst only expressions. */
3368 static struct ls_expr *
3369 find_rtx_in_ldst (rtx x)
3371 struct ls_expr e;
3372 void **slot;
3373 if (!pre_ldst_table)
3374 return NULL;
3375 e.pattern = x;
3376 slot = htab_find_slot (pre_ldst_table, &e, NO_INSERT);
3377 if (!slot || ((struct ls_expr *)*slot)->invalid)
3378 return NULL;
3379 return (struct ls_expr *) *slot;
3382 /* Load Motion for loads which only kill themselves. */
3384 /* Return true if x, a MEM, is a simple access with no side effects.
3385 These are the types of loads we consider for the ld_motion list,
3386 otherwise we let the usual aliasing take care of it. */
3388 static int
3389 simple_mem (const_rtx x)
3391 if (MEM_VOLATILE_P (x))
3392 return 0;
3394 if (GET_MODE (x) == BLKmode)
3395 return 0;
3397 /* If we are handling exceptions, we must be careful with memory references
3398 that may trap. If we are not, the behavior is undefined, so we may just
3399 continue. */
3400 if (cfun->can_throw_non_call_exceptions && may_trap_p (x))
3401 return 0;
3403 if (side_effects_p (x))
3404 return 0;
3406 /* Do not consider function arguments passed on stack. */
3407 if (reg_mentioned_p (stack_pointer_rtx, x))
3408 return 0;
3410 if (flag_float_store && FLOAT_MODE_P (GET_MODE (x)))
3411 return 0;
3413 return 1;
3416 /* Make sure there isn't a buried reference in this pattern anywhere.
3417 If there is, invalidate the entry for it since we're not capable
3418 of fixing it up just yet.. We have to be sure we know about ALL
3419 loads since the aliasing code will allow all entries in the
3420 ld_motion list to not-alias itself. If we miss a load, we will get
3421 the wrong value since gcse might common it and we won't know to
3422 fix it up. */
3424 static void
3425 invalidate_any_buried_refs (rtx x)
3427 const char * fmt;
3428 int i, j;
3429 struct ls_expr * ptr;
3431 /* Invalidate it in the list. */
3432 if (MEM_P (x) && simple_mem (x))
3434 ptr = ldst_entry (x);
3435 ptr->invalid = 1;
3438 /* Recursively process the insn. */
3439 fmt = GET_RTX_FORMAT (GET_CODE (x));
3441 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3443 if (fmt[i] == 'e')
3444 invalidate_any_buried_refs (XEXP (x, i));
3445 else if (fmt[i] == 'E')
3446 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3447 invalidate_any_buried_refs (XVECEXP (x, i, j));
3451 /* Find all the 'simple' MEMs which are used in LOADs and STORES. Simple
3452 being defined as MEM loads and stores to symbols, with no side effects
3453 and no registers in the expression. For a MEM destination, we also
3454 check that the insn is still valid if we replace the destination with a
3455 REG, as is done in update_ld_motion_stores. If there are any uses/defs
3456 which don't match this criteria, they are invalidated and trimmed out
3457 later. */
3459 static void
3460 compute_ld_motion_mems (void)
3462 struct ls_expr * ptr;
3463 basic_block bb;
3464 rtx insn;
3466 pre_ldst_mems = NULL;
3467 pre_ldst_table
3468 = htab_create (13, pre_ldst_expr_hash, pre_ldst_expr_eq, NULL);
3470 FOR_EACH_BB (bb)
3472 FOR_BB_INSNS (bb, insn)
3474 if (NONDEBUG_INSN_P (insn))
3476 if (GET_CODE (PATTERN (insn)) == SET)
3478 rtx src = SET_SRC (PATTERN (insn));
3479 rtx dest = SET_DEST (PATTERN (insn));
3481 /* Check for a simple LOAD... */
3482 if (MEM_P (src) && simple_mem (src))
3484 ptr = ldst_entry (src);
3485 if (REG_P (dest))
3486 ptr->loads = alloc_INSN_LIST (insn, ptr->loads);
3487 else
3488 ptr->invalid = 1;
3490 else
3492 /* Make sure there isn't a buried load somewhere. */
3493 invalidate_any_buried_refs (src);
3496 /* Check for stores. Don't worry about aliased ones, they
3497 will block any movement we might do later. We only care
3498 about this exact pattern since those are the only
3499 circumstance that we will ignore the aliasing info. */
3500 if (MEM_P (dest) && simple_mem (dest))
3502 ptr = ldst_entry (dest);
3504 if (! MEM_P (src)
3505 && GET_CODE (src) != ASM_OPERANDS
3506 /* Check for REG manually since want_to_gcse_p
3507 returns 0 for all REGs. */
3508 && can_assign_to_reg_without_clobbers_p (src))
3509 ptr->stores = alloc_INSN_LIST (insn, ptr->stores);
3510 else
3511 ptr->invalid = 1;
3514 else
3515 invalidate_any_buried_refs (PATTERN (insn));
3521 /* Remove any references that have been either invalidated or are not in the
3522 expression list for pre gcse. */
3524 static void
3525 trim_ld_motion_mems (void)
3527 struct ls_expr * * last = & pre_ldst_mems;
3528 struct ls_expr * ptr = pre_ldst_mems;
3530 while (ptr != NULL)
3532 struct expr * expr;
3534 /* Delete if entry has been made invalid. */
3535 if (! ptr->invalid)
3537 /* Delete if we cannot find this mem in the expression list. */
3538 unsigned int hash = ptr->hash_index % expr_hash_table.size;
3540 for (expr = expr_hash_table.table[hash];
3541 expr != NULL;
3542 expr = expr->next_same_hash)
3543 if (expr_equiv_p (expr->expr, ptr->pattern))
3544 break;
3546 else
3547 expr = (struct expr *) 0;
3549 if (expr)
3551 /* Set the expression field if we are keeping it. */
3552 ptr->expr = expr;
3553 last = & ptr->next;
3554 ptr = ptr->next;
3556 else
3558 *last = ptr->next;
3559 htab_remove_elt_with_hash (pre_ldst_table, ptr, ptr->hash_index);
3560 free_ldst_entry (ptr);
3561 ptr = * last;
3565 /* Show the world what we've found. */
3566 if (dump_file && pre_ldst_mems != NULL)
3567 print_ldst_list (dump_file);
3570 /* This routine will take an expression which we are replacing with
3571 a reaching register, and update any stores that are needed if
3572 that expression is in the ld_motion list. Stores are updated by
3573 copying their SRC to the reaching register, and then storing
3574 the reaching register into the store location. These keeps the
3575 correct value in the reaching register for the loads. */
3577 static void
3578 update_ld_motion_stores (struct expr * expr)
3580 struct ls_expr * mem_ptr;
3582 if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
3584 /* We can try to find just the REACHED stores, but is shouldn't
3585 matter to set the reaching reg everywhere... some might be
3586 dead and should be eliminated later. */
3588 /* We replace (set mem expr) with (set reg expr) (set mem reg)
3589 where reg is the reaching reg used in the load. We checked in
3590 compute_ld_motion_mems that we can replace (set mem expr) with
3591 (set reg expr) in that insn. */
3592 rtx list = mem_ptr->stores;
3594 for ( ; list != NULL_RTX; list = XEXP (list, 1))
3596 rtx insn = XEXP (list, 0);
3597 rtx pat = PATTERN (insn);
3598 rtx src = SET_SRC (pat);
3599 rtx reg = expr->reaching_reg;
3600 rtx copy;
3602 /* If we've already copied it, continue. */
3603 if (expr->reaching_reg == src)
3604 continue;
3606 if (dump_file)
3608 fprintf (dump_file, "PRE: store updated with reaching reg ");
3609 print_rtl (dump_file, reg);
3610 fprintf (dump_file, ":\n ");
3611 print_inline_rtx (dump_file, insn, 8);
3612 fprintf (dump_file, "\n");
3615 copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat)));
3616 emit_insn_before (copy, insn);
3617 SET_SRC (pat) = reg;
3618 df_insn_rescan (insn);
3620 /* un-recognize this pattern since it's probably different now. */
3621 INSN_CODE (insn) = -1;
3622 gcse_create_count++;
3627 /* Return true if the graph is too expensive to optimize. PASS is the
3628 optimization about to be performed. */
3630 static bool
3631 is_too_expensive (const char *pass)
3633 /* Trying to perform global optimizations on flow graphs which have
3634 a high connectivity will take a long time and is unlikely to be
3635 particularly useful.
3637 In normal circumstances a cfg should have about twice as many
3638 edges as blocks. But we do not want to punish small functions
3639 which have a couple switch statements. Rather than simply
3640 threshold the number of blocks, uses something with a more
3641 graceful degradation. */
3642 if (n_edges > 20000 + n_basic_blocks * 4)
3644 warning (OPT_Wdisabled_optimization,
3645 "%s: %d basic blocks and %d edges/basic block",
3646 pass, n_basic_blocks, n_edges / n_basic_blocks);
3648 return true;
3651 /* If allocating memory for the dataflow bitmaps would take up too much
3652 storage it's better just to disable the optimization. */
3653 if ((n_basic_blocks
3654 * SBITMAP_SET_SIZE (max_reg_num ())
3655 * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
3657 warning (OPT_Wdisabled_optimization,
3658 "%s: %d basic blocks and %d registers",
3659 pass, n_basic_blocks, max_reg_num ());
3661 return true;
3664 return false;
3667 /* All the passes implemented in this file. Each pass has its
3668 own gate and execute function, and at the end of the file a
3669 pass definition for passes.c.
3671 We do not construct an accurate cfg in functions which call
3672 setjmp, so none of these passes runs if the function calls
3673 setjmp.
3674 FIXME: Should just handle setjmp via REG_SETJMP notes. */
3676 static bool
3677 gate_rtl_pre (void)
3679 return optimize > 0 && flag_gcse
3680 && !cfun->calls_setjmp
3681 && optimize_function_for_speed_p (cfun)
3682 && dbg_cnt (pre);
3685 static unsigned int
3686 execute_rtl_pre (void)
3688 int changed;
3689 delete_unreachable_blocks ();
3690 df_analyze ();
3691 changed = one_pre_gcse_pass ();
3692 flag_rerun_cse_after_global_opts |= changed;
3693 if (changed)
3694 cleanup_cfg (0);
3695 return 0;
3698 static bool
3699 gate_rtl_hoist (void)
3701 return optimize > 0 && flag_gcse
3702 && !cfun->calls_setjmp
3703 /* It does not make sense to run code hoisting unless we are optimizing
3704 for code size -- it rarely makes programs faster, and can make then
3705 bigger if we did PRE (when optimizing for space, we don't run PRE). */
3706 && optimize_function_for_size_p (cfun)
3707 && dbg_cnt (hoist);
3710 static unsigned int
3711 execute_rtl_hoist (void)
3713 int changed;
3714 delete_unreachable_blocks ();
3715 df_analyze ();
3716 changed = one_code_hoisting_pass ();
3717 flag_rerun_cse_after_global_opts |= changed;
3718 if (changed)
3719 cleanup_cfg (0);
3720 return 0;
3723 struct rtl_opt_pass pass_rtl_pre =
3726 RTL_PASS,
3727 "rtl pre", /* name */
3728 gate_rtl_pre, /* gate */
3729 execute_rtl_pre, /* execute */
3730 NULL, /* sub */
3731 NULL, /* next */
3732 0, /* static_pass_number */
3733 TV_PRE, /* tv_id */
3734 PROP_cfglayout, /* properties_required */
3735 0, /* properties_provided */
3736 0, /* properties_destroyed */
3737 0, /* todo_flags_start */
3738 TODO_df_finish | TODO_verify_rtl_sharing |
3739 TODO_verify_flow | TODO_ggc_collect /* todo_flags_finish */
3743 struct rtl_opt_pass pass_rtl_hoist =
3746 RTL_PASS,
3747 "hoist", /* name */
3748 gate_rtl_hoist, /* gate */
3749 execute_rtl_hoist, /* execute */
3750 NULL, /* sub */
3751 NULL, /* next */
3752 0, /* static_pass_number */
3753 TV_HOIST, /* tv_id */
3754 PROP_cfglayout, /* properties_required */
3755 0, /* properties_provided */
3756 0, /* properties_destroyed */
3757 0, /* todo_flags_start */
3758 TODO_df_finish | TODO_verify_rtl_sharing |
3759 TODO_verify_flow | TODO_ggc_collect /* todo_flags_finish */
3763 #include "gt-gcse.h"