Cleanup leftover libs.
[dragonfly.git] / contrib / gcc-4.7 / gcc / gcse.c
blob0fdc51a74f995f1cf9e15f845cbbcf59543c9bb1
1 /* Partial redundancy elimination / Hoisting for RTL.
2 Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
3 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* TODO
22 - reordering of memory allocation and freeing to be more space efficient
23 - do rough calc of how many regs are needed in each block, and a rough
24 calc of how many regs are available in each class and use that to
25 throttle back the code in cases where RTX_COST is minimal.
28 /* References searched while implementing this.
30 Compilers Principles, Techniques and Tools
31 Aho, Sethi, Ullman
32 Addison-Wesley, 1988
34 Global Optimization by Suppression of Partial Redundancies
35 E. Morel, C. Renvoise
36 communications of the acm, Vol. 22, Num. 2, Feb. 1979
38 A Portable Machine-Independent Global Optimizer - Design and Measurements
39 Frederick Chow
40 Stanford Ph.D. thesis, Dec. 1983
42 A Fast Algorithm for Code Movement Optimization
43 D.M. Dhamdhere
44 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
46 A Solution to a Problem with Morel and Renvoise's
47 Global Optimization by Suppression of Partial Redundancies
48 K-H Drechsler, M.P. Stadel
49 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
51 Practical Adaptation of the Global Optimization
52 Algorithm of Morel and Renvoise
53 D.M. Dhamdhere
54 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
56 Efficiently Computing Static Single Assignment Form and the Control
57 Dependence Graph
58 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
59 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
61 Lazy Code Motion
62 J. Knoop, O. Ruthing, B. Steffen
63 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
65 What's In a Region? Or Computing Control Dependence Regions in Near-Linear
66 Time for Reducible Flow Control
67 Thomas Ball
68 ACM Letters on Programming Languages and Systems,
69 Vol. 2, Num. 1-4, Mar-Dec 1993
71 An Efficient Representation for Sparse Sets
72 Preston Briggs, Linda Torczon
73 ACM Letters on Programming Languages and Systems,
74 Vol. 2, Num. 1-4, Mar-Dec 1993
76 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
77 K-H Drechsler, M.P. Stadel
78 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
80 Partial Dead Code Elimination
81 J. Knoop, O. Ruthing, B. Steffen
82 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
84 Effective Partial Redundancy Elimination
85 P. Briggs, K.D. Cooper
86 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
88 The Program Structure Tree: Computing Control Regions in Linear Time
89 R. Johnson, D. Pearson, K. Pingali
90 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
92 Optimal Code Motion: Theory and Practice
93 J. Knoop, O. Ruthing, B. Steffen
94 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
96 The power of assignment motion
97 J. Knoop, O. Ruthing, B. Steffen
98 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
100 Global code motion / global value numbering
101 C. Click
102 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
104 Value Driven Redundancy Elimination
105 L.T. Simpson
106 Rice University Ph.D. thesis, Apr. 1996
108 Value Numbering
109 L.T. Simpson
110 Massively Scalar Compiler Project, Rice University, Sep. 1996
112 High Performance Compilers for Parallel Computing
113 Michael Wolfe
114 Addison-Wesley, 1996
116 Advanced Compiler Design and Implementation
117 Steven Muchnick
118 Morgan Kaufmann, 1997
120 Building an Optimizing Compiler
121 Robert Morgan
122 Digital Press, 1998
124 People wishing to speed up the code here should read:
125 Elimination Algorithms for Data Flow Analysis
126 B.G. Ryder, M.C. Paull
127 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
129 How to Analyze Large Programs Efficiently and Informatively
130 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
131 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
133 People wishing to do something different can find various possibilities
134 in the above papers and elsewhere.
137 #include "config.h"
138 #include "system.h"
139 #include "coretypes.h"
140 #include "tm.h"
141 #include "diagnostic-core.h"
142 #include "toplev.h"
144 #include "rtl.h"
145 #include "tree.h"
146 #include "tm_p.h"
147 #include "regs.h"
148 #include "hard-reg-set.h"
149 #include "flags.h"
150 #include "insn-config.h"
151 #include "recog.h"
152 #include "basic-block.h"
153 #include "output.h"
154 #include "function.h"
155 #include "expr.h"
156 #include "except.h"
157 #include "ggc.h"
158 #include "params.h"
159 #include "cselib.h"
160 #include "intl.h"
161 #include "obstack.h"
162 #include "timevar.h"
163 #include "tree-pass.h"
164 #include "hashtab.h"
165 #include "df.h"
166 #include "dbgcnt.h"
167 #include "target.h"
168 #include "gcse.h"
170 /* We support GCSE via Partial Redundancy Elimination. PRE optimizations
171 are a superset of those done by classic GCSE.
173 Two passes of copy/constant propagation are done around PRE or hoisting
174 because the first one enables more GCSE and the second one helps to clean
175 up the copies that PRE and HOIST create. This is needed more for PRE than
176 for HOIST because code hoisting will try to use an existing register
177 containing the common subexpression rather than create a new one. This is
178 harder to do for PRE because of the code motion (which HOIST doesn't do).
180 Expressions we are interested in GCSE-ing are of the form
181 (set (pseudo-reg) (expression)).
182 Function want_to_gcse_p says what these are.
184 In addition, expressions in REG_EQUAL notes are candidates for GCSE-ing.
185 This allows PRE to hoist expressions that are expressed in multiple insns,
186 such as complex address calculations (e.g. for PIC code, or loads with a
187 high part and a low part).
189 PRE handles moving invariant expressions out of loops (by treating them as
190 partially redundant).
192 **********************
194 We used to support multiple passes but there are diminishing returns in
195 doing so. The first pass usually makes 90% of the changes that are doable.
196 A second pass can make a few more changes made possible by the first pass.
197 Experiments show any further passes don't make enough changes to justify
198 the expense.
200 A study of spec92 using an unlimited number of passes:
201 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
202 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
203 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
205 It was found doing copy propagation between each pass enables further
206 substitutions.
208 This study was done before expressions in REG_EQUAL notes were added as
209 candidate expressions for optimization, and before the GIMPLE optimizers
210 were added. Probably, multiple passes is even less efficient now than
211 at the time when the study was conducted.
213 PRE is quite expensive in complicated functions because the DFA can take
214 a while to converge. Hence we only perform one pass.
216 **********************
218 The steps for PRE are:
220 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
222 2) Perform the data flow analysis for PRE.
224 3) Delete the redundant instructions
226 4) Insert the required copies [if any] that make the partially
227 redundant instructions fully redundant.
229 5) For other reaching expressions, insert an instruction to copy the value
230 to a newly created pseudo that will reach the redundant instruction.
232 The deletion is done first so that when we do insertions we
233 know which pseudo reg to use.
235 Various papers have argued that PRE DFA is expensive (O(n^2)) and others
236 argue it is not. The number of iterations for the algorithm to converge
237 is typically 2-4 so I don't view it as that expensive (relatively speaking).
239 PRE GCSE depends heavily on the second CPROP pass to clean up the copies
240 we create. To make an expression reach the place where it's redundant,
241 the result of the expression is copied to a new register, and the redundant
242 expression is deleted by replacing it with this new register. Classic GCSE
243 doesn't have this problem as much as it computes the reaching defs of
244 each register in each block and thus can try to use an existing
245 register. */
247 /* GCSE global vars. */
249 struct target_gcse default_target_gcse;
250 #if SWITCHABLE_TARGET
251 struct target_gcse *this_target_gcse = &default_target_gcse;
252 #endif
254 /* Set to non-zero if CSE should run after all GCSE optimizations are done. */
255 int flag_rerun_cse_after_global_opts;
257 /* An obstack for our working variables. */
258 static struct obstack gcse_obstack;
260 struct reg_use {rtx reg_rtx; };
262 /* Hash table of expressions. */
264 struct expr
266 /* The expression. */
267 rtx expr;
268 /* Index in the available expression bitmaps. */
269 int bitmap_index;
270 /* Next entry with the same hash. */
271 struct expr *next_same_hash;
272 /* List of anticipatable occurrences in basic blocks in the function.
273 An "anticipatable occurrence" is one that is the first occurrence in the
274 basic block, the operands are not modified in the basic block prior
275 to the occurrence and the output is not used between the start of
276 the block and the occurrence. */
277 struct occr *antic_occr;
278 /* List of available occurrence in basic blocks in the function.
279 An "available occurrence" is one that is the last occurrence in the
280 basic block and the operands are not modified by following statements in
281 the basic block [including this insn]. */
282 struct occr *avail_occr;
283 /* Non-null if the computation is PRE redundant.
284 The value is the newly created pseudo-reg to record a copy of the
285 expression in all the places that reach the redundant copy. */
286 rtx reaching_reg;
287 /* Maximum distance in instructions this expression can travel.
288 We avoid moving simple expressions for more than a few instructions
289 to keep register pressure under control.
290 A value of "0" removes restrictions on how far the expression can
291 travel. */
292 int max_distance;
295 /* Occurrence of an expression.
296 There is one per basic block. If a pattern appears more than once the
297 last appearance is used [or first for anticipatable expressions]. */
299 struct occr
301 /* Next occurrence of this expression. */
302 struct occr *next;
303 /* The insn that computes the expression. */
304 rtx insn;
305 /* Nonzero if this [anticipatable] occurrence has been deleted. */
306 char deleted_p;
307 /* Nonzero if this [available] occurrence has been copied to
308 reaching_reg. */
309 /* ??? This is mutually exclusive with deleted_p, so they could share
310 the same byte. */
311 char copied_p;
314 typedef struct occr *occr_t;
315 DEF_VEC_P (occr_t);
316 DEF_VEC_ALLOC_P (occr_t, heap);
318 /* Expression hash tables.
319 Each hash table is an array of buckets.
320 ??? It is known that if it were an array of entries, structure elements
321 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
322 not clear whether in the final analysis a sufficient amount of memory would
323 be saved as the size of the available expression bitmaps would be larger
324 [one could build a mapping table without holes afterwards though].
325 Someday I'll perform the computation and figure it out. */
327 struct hash_table_d
329 /* The table itself.
330 This is an array of `expr_hash_table_size' elements. */
331 struct expr **table;
333 /* Size of the hash table, in elements. */
334 unsigned int size;
336 /* Number of hash table elements. */
337 unsigned int n_elems;
340 /* Expression hash table. */
341 static struct hash_table_d expr_hash_table;
343 /* This is a list of expressions which are MEMs and will be used by load
344 or store motion.
345 Load motion tracks MEMs which aren't killed by anything except itself,
346 i.e. loads and stores to a single location.
347 We can then allow movement of these MEM refs with a little special
348 allowance. (all stores copy the same value to the reaching reg used
349 for the loads). This means all values used to store into memory must have
350 no side effects so we can re-issue the setter value. */
352 struct ls_expr
354 struct expr * expr; /* Gcse expression reference for LM. */
355 rtx pattern; /* Pattern of this mem. */
356 rtx pattern_regs; /* List of registers mentioned by the mem. */
357 rtx loads; /* INSN list of loads seen. */
358 rtx stores; /* INSN list of stores seen. */
359 struct ls_expr * next; /* Next in the list. */
360 int invalid; /* Invalid for some reason. */
361 int index; /* If it maps to a bitmap index. */
362 unsigned int hash_index; /* Index when in a hash table. */
363 rtx reaching_reg; /* Register to use when re-writing. */
366 /* Head of the list of load/store memory refs. */
367 static struct ls_expr * pre_ldst_mems = NULL;
369 /* Hashtable for the load/store memory refs. */
370 static htab_t pre_ldst_table = NULL;
372 /* Bitmap containing one bit for each register in the program.
373 Used when performing GCSE to track which registers have been set since
374 the start of the basic block. */
375 static regset reg_set_bitmap;
377 /* Array, indexed by basic block number for a list of insns which modify
378 memory within that block. */
379 static VEC (rtx,heap) **modify_mem_list;
380 static bitmap modify_mem_list_set;
382 typedef struct modify_pair_s
384 rtx dest; /* A MEM. */
385 rtx dest_addr; /* The canonical address of `dest'. */
386 } modify_pair;
388 DEF_VEC_O(modify_pair);
389 DEF_VEC_ALLOC_O(modify_pair,heap);
391 /* This array parallels modify_mem_list, except that it stores MEMs
392 being set and their canonicalized memory addresses. */
393 static VEC (modify_pair,heap) **canon_modify_mem_list;
395 /* Bitmap indexed by block numbers to record which blocks contain
396 function calls. */
397 static bitmap blocks_with_calls;
399 /* Various variables for statistics gathering. */
401 /* Memory used in a pass.
402 This isn't intended to be absolutely precise. Its intent is only
403 to keep an eye on memory usage. */
404 static int bytes_used;
406 /* GCSE substitutions made. */
407 static int gcse_subst_count;
408 /* Number of copy instructions created. */
409 static int gcse_create_count;
411 /* Doing code hoisting. */
412 static bool doing_code_hoisting_p = false;
414 /* For available exprs */
415 static sbitmap *ae_kill;
417 static void compute_can_copy (void);
418 static void *gmalloc (size_t) ATTRIBUTE_MALLOC;
419 static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC;
420 static void *gcse_alloc (unsigned long);
421 static void alloc_gcse_mem (void);
422 static void free_gcse_mem (void);
423 static void hash_scan_insn (rtx, struct hash_table_d *);
424 static void hash_scan_set (rtx, rtx, struct hash_table_d *);
425 static void hash_scan_clobber (rtx, rtx, struct hash_table_d *);
426 static void hash_scan_call (rtx, rtx, struct hash_table_d *);
427 static int want_to_gcse_p (rtx, int *);
428 static int oprs_unchanged_p (const_rtx, const_rtx, int);
429 static int oprs_anticipatable_p (const_rtx, const_rtx);
430 static int oprs_available_p (const_rtx, const_rtx);
431 static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, int,
432 struct hash_table_d *);
433 static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int);
434 static int expr_equiv_p (const_rtx, const_rtx);
435 static void record_last_reg_set_info (rtx, int);
436 static void record_last_mem_set_info (rtx);
437 static void record_last_set_info (rtx, const_rtx, void *);
438 static void compute_hash_table (struct hash_table_d *);
439 static void alloc_hash_table (struct hash_table_d *);
440 static void free_hash_table (struct hash_table_d *);
441 static void compute_hash_table_work (struct hash_table_d *);
442 static void dump_hash_table (FILE *, const char *, struct hash_table_d *);
443 static void compute_transp (const_rtx, int, sbitmap *);
444 static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
445 struct hash_table_d *);
446 static void mems_conflict_for_gcse_p (rtx, const_rtx, void *);
447 static int load_killed_in_block_p (const_basic_block, int, const_rtx, int);
448 static void canon_list_insert (rtx, const_rtx, void *);
449 static void alloc_pre_mem (int, int);
450 static void free_pre_mem (void);
451 static struct edge_list *compute_pre_data (void);
452 static int pre_expr_reaches_here_p (basic_block, struct expr *,
453 basic_block);
454 static void insert_insn_end_basic_block (struct expr *, basic_block);
455 static void pre_insert_copy_insn (struct expr *, rtx);
456 static void pre_insert_copies (void);
457 static int pre_delete (void);
458 static int pre_gcse (struct edge_list *);
459 static int one_pre_gcse_pass (void);
460 static void add_label_notes (rtx, rtx);
461 static void alloc_code_hoist_mem (int, int);
462 static void free_code_hoist_mem (void);
463 static void compute_code_hoist_vbeinout (void);
464 static void compute_code_hoist_data (void);
465 static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *,
466 int, int *);
467 static int hoist_code (void);
468 static int one_code_hoisting_pass (void);
469 static rtx process_insert_insn (struct expr *);
470 static int pre_edge_insert (struct edge_list *, struct expr **);
471 static int pre_expr_reaches_here_p_work (basic_block, struct expr *,
472 basic_block, char *);
473 static struct ls_expr * ldst_entry (rtx);
474 static void free_ldst_entry (struct ls_expr *);
475 static void free_ld_motion_mems (void);
476 static void print_ldst_list (FILE *);
477 static struct ls_expr * find_rtx_in_ldst (rtx);
478 static int simple_mem (const_rtx);
479 static void invalidate_any_buried_refs (rtx);
480 static void compute_ld_motion_mems (void);
481 static void trim_ld_motion_mems (void);
482 static void update_ld_motion_stores (struct expr *);
483 static void clear_modify_mem_tables (void);
484 static void free_modify_mem_tables (void);
485 static rtx gcse_emit_move_after (rtx, rtx, rtx);
486 static bool is_too_expensive (const char *);
488 #define GNEW(T) ((T *) gmalloc (sizeof (T)))
489 #define GCNEW(T) ((T *) gcalloc (1, sizeof (T)))
491 #define GNEWVEC(T, N) ((T *) gmalloc (sizeof (T) * (N)))
492 #define GCNEWVEC(T, N) ((T *) gcalloc ((N), sizeof (T)))
494 #define GNEWVAR(T, S) ((T *) gmalloc ((S)))
495 #define GCNEWVAR(T, S) ((T *) gcalloc (1, (S)))
497 #define GOBNEW(T) ((T *) gcse_alloc (sizeof (T)))
498 #define GOBNEWVAR(T, S) ((T *) gcse_alloc ((S)))
500 /* Misc. utilities. */
502 #define can_copy \
503 (this_target_gcse->x_can_copy)
504 #define can_copy_init_p \
505 (this_target_gcse->x_can_copy_init_p)
507 /* Compute which modes support reg/reg copy operations. */
509 static void
510 compute_can_copy (void)
512 int i;
513 #ifndef AVOID_CCMODE_COPIES
514 rtx reg, insn;
515 #endif
516 memset (can_copy, 0, NUM_MACHINE_MODES);
518 start_sequence ();
519 for (i = 0; i < NUM_MACHINE_MODES; i++)
520 if (GET_MODE_CLASS (i) == MODE_CC)
522 #ifdef AVOID_CCMODE_COPIES
523 can_copy[i] = 0;
524 #else
525 reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
526 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
527 if (recog (PATTERN (insn), insn, NULL) >= 0)
528 can_copy[i] = 1;
529 #endif
531 else
532 can_copy[i] = 1;
534 end_sequence ();
537 /* Returns whether the mode supports reg/reg copy operations. */
539 bool
540 can_copy_p (enum machine_mode mode)
542 if (! can_copy_init_p)
544 compute_can_copy ();
545 can_copy_init_p = true;
548 return can_copy[mode] != 0;
551 /* Cover function to xmalloc to record bytes allocated. */
553 static void *
554 gmalloc (size_t size)
556 bytes_used += size;
557 return xmalloc (size);
560 /* Cover function to xcalloc to record bytes allocated. */
562 static void *
563 gcalloc (size_t nelem, size_t elsize)
565 bytes_used += nelem * elsize;
566 return xcalloc (nelem, elsize);
569 /* Cover function to obstack_alloc. */
571 static void *
572 gcse_alloc (unsigned long size)
574 bytes_used += size;
575 return obstack_alloc (&gcse_obstack, size);
578 /* Allocate memory for the reg/memory set tracking tables.
579 This is called at the start of each pass. */
581 static void
582 alloc_gcse_mem (void)
584 /* Allocate vars to track sets of regs. */
585 reg_set_bitmap = ALLOC_REG_SET (NULL);
587 /* Allocate array to keep a list of insns which modify memory in each
588 basic block. */
589 modify_mem_list = GCNEWVEC (VEC (rtx,heap) *, last_basic_block);
590 canon_modify_mem_list = GCNEWVEC (VEC (modify_pair,heap) *,
591 last_basic_block);
592 modify_mem_list_set = BITMAP_ALLOC (NULL);
593 blocks_with_calls = BITMAP_ALLOC (NULL);
596 /* Free memory allocated by alloc_gcse_mem. */
598 static void
599 free_gcse_mem (void)
601 FREE_REG_SET (reg_set_bitmap);
603 free_modify_mem_tables ();
604 BITMAP_FREE (modify_mem_list_set);
605 BITMAP_FREE (blocks_with_calls);
608 /* Compute the local properties of each recorded expression.
610 Local properties are those that are defined by the block, irrespective of
611 other blocks.
613 An expression is transparent in a block if its operands are not modified
614 in the block.
616 An expression is computed (locally available) in a block if it is computed
617 at least once and expression would contain the same value if the
618 computation was moved to the end of the block.
620 An expression is locally anticipatable in a block if it is computed at
621 least once and expression would contain the same value if the computation
622 was moved to the beginning of the block.
624 We call this routine for pre and code hoisting. They all compute
625 basically the same information and thus can easily share this code.
627 TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
628 properties. If NULL, then it is not necessary to compute or record that
629 particular property.
631 TABLE controls which hash table to look at. */
633 static void
634 compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc,
635 struct hash_table_d *table)
637 unsigned int i;
639 /* Initialize any bitmaps that were passed in. */
640 if (transp)
642 sbitmap_vector_ones (transp, last_basic_block);
645 if (comp)
646 sbitmap_vector_zero (comp, last_basic_block);
647 if (antloc)
648 sbitmap_vector_zero (antloc, last_basic_block);
650 for (i = 0; i < table->size; i++)
652 struct expr *expr;
654 for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
656 int indx = expr->bitmap_index;
657 struct occr *occr;
659 /* The expression is transparent in this block if it is not killed.
660 We start by assuming all are transparent [none are killed], and
661 then reset the bits for those that are. */
662 if (transp)
663 compute_transp (expr->expr, indx, transp);
665 /* The occurrences recorded in antic_occr are exactly those that
666 we want to set to nonzero in ANTLOC. */
667 if (antloc)
668 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
670 SET_BIT (antloc[BLOCK_FOR_INSN (occr->insn)->index], indx);
672 /* While we're scanning the table, this is a good place to
673 initialize this. */
674 occr->deleted_p = 0;
677 /* The occurrences recorded in avail_occr are exactly those that
678 we want to set to nonzero in COMP. */
679 if (comp)
680 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
682 SET_BIT (comp[BLOCK_FOR_INSN (occr->insn)->index], indx);
684 /* While we're scanning the table, this is a good place to
685 initialize this. */
686 occr->copied_p = 0;
689 /* While we're scanning the table, this is a good place to
690 initialize this. */
691 expr->reaching_reg = 0;
696 /* Hash table support. */
698 struct reg_avail_info
700 basic_block last_bb;
701 int first_set;
702 int last_set;
705 static struct reg_avail_info *reg_avail_info;
706 static basic_block current_bb;
708 /* See whether X, the source of a set, is something we want to consider for
709 GCSE. */
711 static int
712 want_to_gcse_p (rtx x, int *max_distance_ptr)
714 #ifdef STACK_REGS
715 /* On register stack architectures, don't GCSE constants from the
716 constant pool, as the benefits are often swamped by the overhead
717 of shuffling the register stack between basic blocks. */
718 if (IS_STACK_MODE (GET_MODE (x)))
719 x = avoid_constant_pool_reference (x);
720 #endif
722 /* GCSE'ing constants:
724 We do not specifically distinguish between constant and non-constant
725 expressions in PRE and Hoist. We use set_src_cost below to limit
726 the maximum distance simple expressions can travel.
728 Nevertheless, constants are much easier to GCSE, and, hence,
729 it is easy to overdo the optimizations. Usually, excessive PRE and
730 Hoisting of constant leads to increased register pressure.
732 RA can deal with this by rematerialing some of the constants.
733 Therefore, it is important that the back-end generates sets of constants
734 in a way that allows reload rematerialize them under high register
735 pressure, i.e., a pseudo register with REG_EQUAL to constant
736 is set only once. Failing to do so will result in IRA/reload
737 spilling such constants under high register pressure instead of
738 rematerializing them. */
740 switch (GET_CODE (x))
742 case REG:
743 case SUBREG:
744 case CALL:
745 return 0;
747 case CONST_INT:
748 case CONST_DOUBLE:
749 case CONST_FIXED:
750 case CONST_VECTOR:
751 if (!doing_code_hoisting_p)
752 /* Do not PRE constants. */
753 return 0;
755 /* FALLTHRU */
757 default:
758 if (doing_code_hoisting_p)
759 /* PRE doesn't implement max_distance restriction. */
761 int cost;
762 int max_distance;
764 gcc_assert (!optimize_function_for_speed_p (cfun)
765 && optimize_function_for_size_p (cfun));
766 cost = set_src_cost (x, 0);
768 if (cost < COSTS_N_INSNS (GCSE_UNRESTRICTED_COST))
770 max_distance = (GCSE_COST_DISTANCE_RATIO * cost) / 10;
771 if (max_distance == 0)
772 return 0;
774 gcc_assert (max_distance > 0);
776 else
777 max_distance = 0;
779 if (max_distance_ptr)
780 *max_distance_ptr = max_distance;
783 return can_assign_to_reg_without_clobbers_p (x);
787 /* Used internally by can_assign_to_reg_without_clobbers_p. */
789 static GTY(()) rtx test_insn;
791 /* Return true if we can assign X to a pseudo register such that the
792 resulting insn does not result in clobbering a hard register as a
793 side-effect.
795 Additionally, if the target requires it, check that the resulting insn
796 can be copied. If it cannot, this means that X is special and probably
797 has hidden side-effects we don't want to mess with.
799 This function is typically used by code motion passes, to verify
800 that it is safe to insert an insn without worrying about clobbering
801 maybe live hard regs. */
803 bool
804 can_assign_to_reg_without_clobbers_p (rtx x)
806 int num_clobbers = 0;
807 int icode;
809 /* If this is a valid operand, we are OK. If it's VOIDmode, we aren't. */
810 if (general_operand (x, GET_MODE (x)))
811 return 1;
812 else if (GET_MODE (x) == VOIDmode)
813 return 0;
815 /* Otherwise, check if we can make a valid insn from it. First initialize
816 our test insn if we haven't already. */
817 if (test_insn == 0)
819 test_insn
820 = make_insn_raw (gen_rtx_SET (VOIDmode,
821 gen_rtx_REG (word_mode,
822 FIRST_PSEUDO_REGISTER * 2),
823 const0_rtx));
824 NEXT_INSN (test_insn) = PREV_INSN (test_insn) = 0;
827 /* Now make an insn like the one we would make when GCSE'ing and see if
828 valid. */
829 PUT_MODE (SET_DEST (PATTERN (test_insn)), GET_MODE (x));
830 SET_SRC (PATTERN (test_insn)) = x;
832 icode = recog (PATTERN (test_insn), test_insn, &num_clobbers);
833 if (icode < 0)
834 return false;
836 if (num_clobbers > 0 && added_clobbers_hard_reg_p (icode))
837 return false;
839 if (targetm.cannot_copy_insn_p && targetm.cannot_copy_insn_p (test_insn))
840 return false;
842 return true;
845 /* Return nonzero if the operands of expression X are unchanged from the
846 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
847 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */
849 static int
850 oprs_unchanged_p (const_rtx x, const_rtx insn, int avail_p)
852 int i, j;
853 enum rtx_code code;
854 const char *fmt;
856 if (x == 0)
857 return 1;
859 code = GET_CODE (x);
860 switch (code)
862 case REG:
864 struct reg_avail_info *info = &reg_avail_info[REGNO (x)];
866 if (info->last_bb != current_bb)
867 return 1;
868 if (avail_p)
869 return info->last_set < DF_INSN_LUID (insn);
870 else
871 return info->first_set >= DF_INSN_LUID (insn);
874 case MEM:
875 if (load_killed_in_block_p (current_bb, DF_INSN_LUID (insn),
876 x, avail_p))
877 return 0;
878 else
879 return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
881 case PRE_DEC:
882 case PRE_INC:
883 case POST_DEC:
884 case POST_INC:
885 case PRE_MODIFY:
886 case POST_MODIFY:
887 return 0;
889 case PC:
890 case CC0: /*FIXME*/
891 case CONST:
892 case CONST_INT:
893 case CONST_DOUBLE:
894 case CONST_FIXED:
895 case CONST_VECTOR:
896 case SYMBOL_REF:
897 case LABEL_REF:
898 case ADDR_VEC:
899 case ADDR_DIFF_VEC:
900 return 1;
902 default:
903 break;
906 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
908 if (fmt[i] == 'e')
910 /* If we are about to do the last recursive call needed at this
911 level, change it into iteration. This function is called enough
912 to be worth it. */
913 if (i == 0)
914 return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
916 else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
917 return 0;
919 else if (fmt[i] == 'E')
920 for (j = 0; j < XVECLEN (x, i); j++)
921 if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
922 return 0;
925 return 1;
928 /* Info passed from load_killed_in_block_p to mems_conflict_for_gcse_p. */
930 struct mem_conflict_info
932 /* A memory reference for a load instruction, mems_conflict_for_gcse_p will
933 see if a memory store conflicts with this memory load. */
934 const_rtx mem;
936 /* True if mems_conflict_for_gcse_p finds a conflict between two memory
937 references. */
938 bool conflict;
941 /* DEST is the output of an instruction. If it is a memory reference and
942 possibly conflicts with the load found in DATA, then communicate this
943 information back through DATA. */
945 static void
946 mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
947 void *data)
949 struct mem_conflict_info *mci = (struct mem_conflict_info *) data;
951 while (GET_CODE (dest) == SUBREG
952 || GET_CODE (dest) == ZERO_EXTRACT
953 || GET_CODE (dest) == STRICT_LOW_PART)
954 dest = XEXP (dest, 0);
956 /* If DEST is not a MEM, then it will not conflict with the load. Note
957 that function calls are assumed to clobber memory, but are handled
958 elsewhere. */
959 if (! MEM_P (dest))
960 return;
962 /* If we are setting a MEM in our list of specially recognized MEMs,
963 don't mark as killed this time. */
964 if (pre_ldst_mems != NULL && expr_equiv_p (dest, mci->mem))
966 if (!find_rtx_in_ldst (dest))
967 mci->conflict = true;
968 return;
971 if (true_dependence (dest, GET_MODE (dest), mci->mem))
972 mci->conflict = true;
975 /* Return nonzero if the expression in X (a memory reference) is killed
976 in block BB before or after the insn with the LUID in UID_LIMIT.
977 AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
978 before UID_LIMIT.
980 To check the entire block, set UID_LIMIT to max_uid + 1 and
981 AVAIL_P to 0. */
983 static int
984 load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x,
985 int avail_p)
987 VEC (rtx,heap) *list = modify_mem_list[bb->index];
988 rtx setter;
989 unsigned ix;
991 /* If this is a readonly then we aren't going to be changing it. */
992 if (MEM_READONLY_P (x))
993 return 0;
995 FOR_EACH_VEC_ELT_REVERSE (rtx, list, ix, setter)
997 struct mem_conflict_info mci;
999 /* Ignore entries in the list that do not apply. */
1000 if ((avail_p
1001 && DF_INSN_LUID (setter) < uid_limit)
1002 || (! avail_p
1003 && DF_INSN_LUID (setter) > uid_limit))
1004 continue;
1006 /* If SETTER is a call everything is clobbered. Note that calls
1007 to pure functions are never put on the list, so we need not
1008 worry about them. */
1009 if (CALL_P (setter))
1010 return 1;
1012 /* SETTER must be an INSN of some kind that sets memory. Call
1013 note_stores to examine each hunk of memory that is modified. */
1014 mci.mem = x;
1015 mci.conflict = false;
1016 note_stores (PATTERN (setter), mems_conflict_for_gcse_p, &mci);
1017 if (mci.conflict)
1018 return 1;
1020 return 0;
1023 /* Return nonzero if the operands of expression X are unchanged from
1024 the start of INSN's basic block up to but not including INSN. */
1026 static int
1027 oprs_anticipatable_p (const_rtx x, const_rtx insn)
1029 return oprs_unchanged_p (x, insn, 0);
1032 /* Return nonzero if the operands of expression X are unchanged from
1033 INSN to the end of INSN's basic block. */
1035 static int
1036 oprs_available_p (const_rtx x, const_rtx insn)
1038 return oprs_unchanged_p (x, insn, 1);
1041 /* Hash expression X.
1043 MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean
1044 indicating if a volatile operand is found or if the expression contains
1045 something we don't want to insert in the table. HASH_TABLE_SIZE is
1046 the current size of the hash table to be probed. */
1048 static unsigned int
1049 hash_expr (const_rtx x, enum machine_mode mode, int *do_not_record_p,
1050 int hash_table_size)
1052 unsigned int hash;
1054 *do_not_record_p = 0;
1056 hash = hash_rtx (x, mode, do_not_record_p, NULL, /*have_reg_qty=*/false);
1057 return hash % hash_table_size;
1060 /* Return nonzero if exp1 is equivalent to exp2. */
1062 static int
1063 expr_equiv_p (const_rtx x, const_rtx y)
1065 return exp_equiv_p (x, y, 0, true);
1068 /* Insert expression X in INSN in the hash TABLE.
1069 If it is already present, record it as the last occurrence in INSN's
1070 basic block.
1072 MODE is the mode of the value X is being stored into.
1073 It is only used if X is a CONST_INT.
1075 ANTIC_P is nonzero if X is an anticipatable expression.
1076 AVAIL_P is nonzero if X is an available expression.
1078 MAX_DISTANCE is the maximum distance in instructions this expression can
1079 be moved. */
1081 static void
1082 insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
1083 int avail_p, int max_distance, struct hash_table_d *table)
1085 int found, do_not_record_p;
1086 unsigned int hash;
1087 struct expr *cur_expr, *last_expr = NULL;
1088 struct occr *antic_occr, *avail_occr;
1090 hash = hash_expr (x, mode, &do_not_record_p, table->size);
1092 /* Do not insert expression in table if it contains volatile operands,
1093 or if hash_expr determines the expression is something we don't want
1094 to or can't handle. */
1095 if (do_not_record_p)
1096 return;
1098 cur_expr = table->table[hash];
1099 found = 0;
1101 while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1103 /* If the expression isn't found, save a pointer to the end of
1104 the list. */
1105 last_expr = cur_expr;
1106 cur_expr = cur_expr->next_same_hash;
1109 if (! found)
1111 cur_expr = GOBNEW (struct expr);
1112 bytes_used += sizeof (struct expr);
1113 if (table->table[hash] == NULL)
1114 /* This is the first pattern that hashed to this index. */
1115 table->table[hash] = cur_expr;
1116 else
1117 /* Add EXPR to end of this hash chain. */
1118 last_expr->next_same_hash = cur_expr;
1120 /* Set the fields of the expr element. */
1121 cur_expr->expr = x;
1122 cur_expr->bitmap_index = table->n_elems++;
1123 cur_expr->next_same_hash = NULL;
1124 cur_expr->antic_occr = NULL;
1125 cur_expr->avail_occr = NULL;
1126 gcc_assert (max_distance >= 0);
1127 cur_expr->max_distance = max_distance;
1129 else
1130 gcc_assert (cur_expr->max_distance == max_distance);
1132 /* Now record the occurrence(s). */
1133 if (antic_p)
1135 antic_occr = cur_expr->antic_occr;
1137 if (antic_occr
1138 && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn))
1139 antic_occr = NULL;
1141 if (antic_occr)
1142 /* Found another instance of the expression in the same basic block.
1143 Prefer the currently recorded one. We want the first one in the
1144 block and the block is scanned from start to end. */
1145 ; /* nothing to do */
1146 else
1148 /* First occurrence of this expression in this basic block. */
1149 antic_occr = GOBNEW (struct occr);
1150 bytes_used += sizeof (struct occr);
1151 antic_occr->insn = insn;
1152 antic_occr->next = cur_expr->antic_occr;
1153 antic_occr->deleted_p = 0;
1154 cur_expr->antic_occr = antic_occr;
1158 if (avail_p)
1160 avail_occr = cur_expr->avail_occr;
1162 if (avail_occr
1163 && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn))
1165 /* Found another instance of the expression in the same basic block.
1166 Prefer this occurrence to the currently recorded one. We want
1167 the last one in the block and the block is scanned from start
1168 to end. */
1169 avail_occr->insn = insn;
1171 else
1173 /* First occurrence of this expression in this basic block. */
1174 avail_occr = GOBNEW (struct occr);
1175 bytes_used += sizeof (struct occr);
1176 avail_occr->insn = insn;
1177 avail_occr->next = cur_expr->avail_occr;
1178 avail_occr->deleted_p = 0;
1179 cur_expr->avail_occr = avail_occr;
1184 /* Scan SET present in INSN and add an entry to the hash TABLE. */
1186 static void
1187 hash_scan_set (rtx set, rtx insn, struct hash_table_d *table)
1189 rtx src = SET_SRC (set);
1190 rtx dest = SET_DEST (set);
1191 rtx note;
1193 if (GET_CODE (src) == CALL)
1194 hash_scan_call (src, insn, table);
1196 else if (REG_P (dest))
1198 unsigned int regno = REGNO (dest);
1199 int max_distance = 0;
1201 /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
1203 This allows us to do a single GCSE pass and still eliminate
1204 redundant constants, addresses or other expressions that are
1205 constructed with multiple instructions.
1207 However, keep the original SRC if INSN is a simple reg-reg move.
1208 In this case, there will almost always be a REG_EQUAL note on the
1209 insn that sets SRC. By recording the REG_EQUAL value here as SRC
1210 for INSN, we miss copy propagation opportunities and we perform the
1211 same PRE GCSE operation repeatedly on the same REG_EQUAL value if we
1212 do more than one PRE GCSE pass.
1214 Note that this does not impede profitable constant propagations. We
1215 "look through" reg-reg sets in lookup_avail_set. */
1216 note = find_reg_equal_equiv_note (insn);
1217 if (note != 0
1218 && REG_NOTE_KIND (note) == REG_EQUAL
1219 && !REG_P (src)
1220 && want_to_gcse_p (XEXP (note, 0), NULL))
1221 src = XEXP (note, 0), set = gen_rtx_SET (VOIDmode, dest, src);
1223 /* Only record sets of pseudo-regs in the hash table. */
1224 if (regno >= FIRST_PSEUDO_REGISTER
1225 /* Don't GCSE something if we can't do a reg/reg copy. */
1226 && can_copy_p (GET_MODE (dest))
1227 /* GCSE commonly inserts instruction after the insn. We can't
1228 do that easily for EH edges so disable GCSE on these for now. */
1229 /* ??? We can now easily create new EH landing pads at the
1230 gimple level, for splitting edges; there's no reason we
1231 can't do the same thing at the rtl level. */
1232 && !can_throw_internal (insn)
1233 /* Is SET_SRC something we want to gcse? */
1234 && want_to_gcse_p (src, &max_distance)
1235 /* Don't CSE a nop. */
1236 && ! set_noop_p (set)
1237 /* Don't GCSE if it has attached REG_EQUIV note.
1238 At this point this only function parameters should have
1239 REG_EQUIV notes and if the argument slot is used somewhere
1240 explicitly, it means address of parameter has been taken,
1241 so we should not extend the lifetime of the pseudo. */
1242 && (note == NULL_RTX || ! MEM_P (XEXP (note, 0))))
1244 /* An expression is not anticipatable if its operands are
1245 modified before this insn or if this is not the only SET in
1246 this insn. The latter condition does not have to mean that
1247 SRC itself is not anticipatable, but we just will not be
1248 able to handle code motion of insns with multiple sets. */
1249 int antic_p = oprs_anticipatable_p (src, insn)
1250 && !multiple_sets (insn);
1251 /* An expression is not available if its operands are
1252 subsequently modified, including this insn. It's also not
1253 available if this is a branch, because we can't insert
1254 a set after the branch. */
1255 int avail_p = (oprs_available_p (src, insn)
1256 && ! JUMP_P (insn));
1258 insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p,
1259 max_distance, table);
1262 /* In case of store we want to consider the memory value as available in
1263 the REG stored in that memory. This makes it possible to remove
1264 redundant loads from due to stores to the same location. */
1265 else if (flag_gcse_las && REG_P (src) && MEM_P (dest))
1267 unsigned int regno = REGNO (src);
1268 int max_distance = 0;
1270 /* Only record sets of pseudo-regs in the hash table. */
1271 if (regno >= FIRST_PSEUDO_REGISTER
1272 /* Don't GCSE something if we can't do a reg/reg copy. */
1273 && can_copy_p (GET_MODE (src))
1274 /* GCSE commonly inserts instruction after the insn. We can't
1275 do that easily for EH edges so disable GCSE on these for now. */
1276 && !can_throw_internal (insn)
1277 /* Is SET_DEST something we want to gcse? */
1278 && want_to_gcse_p (dest, &max_distance)
1279 /* Don't CSE a nop. */
1280 && ! set_noop_p (set)
1281 /* Don't GCSE if it has attached REG_EQUIV note.
1282 At this point this only function parameters should have
1283 REG_EQUIV notes and if the argument slot is used somewhere
1284 explicitly, it means address of parameter has been taken,
1285 so we should not extend the lifetime of the pseudo. */
1286 && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
1287 || ! MEM_P (XEXP (note, 0))))
1289 /* Stores are never anticipatable. */
1290 int antic_p = 0;
1291 /* An expression is not available if its operands are
1292 subsequently modified, including this insn. It's also not
1293 available if this is a branch, because we can't insert
1294 a set after the branch. */
1295 int avail_p = oprs_available_p (dest, insn)
1296 && ! JUMP_P (insn);
1298 /* Record the memory expression (DEST) in the hash table. */
1299 insert_expr_in_table (dest, GET_MODE (dest), insn,
1300 antic_p, avail_p, max_distance, table);
1305 static void
1306 hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1307 struct hash_table_d *table ATTRIBUTE_UNUSED)
1309 /* Currently nothing to do. */
1312 static void
1313 hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1314 struct hash_table_d *table ATTRIBUTE_UNUSED)
1316 /* Currently nothing to do. */
1319 /* Process INSN and add hash table entries as appropriate. */
1321 static void
1322 hash_scan_insn (rtx insn, struct hash_table_d *table)
1324 rtx pat = PATTERN (insn);
1325 int i;
1327 /* Pick out the sets of INSN and for other forms of instructions record
1328 what's been modified. */
1330 if (GET_CODE (pat) == SET)
1331 hash_scan_set (pat, insn, table);
1333 else if (GET_CODE (pat) == CLOBBER)
1334 hash_scan_clobber (pat, insn, table);
1336 else if (GET_CODE (pat) == CALL)
1337 hash_scan_call (pat, insn, table);
1339 else if (GET_CODE (pat) == PARALLEL)
1340 for (i = 0; i < XVECLEN (pat, 0); i++)
1342 rtx x = XVECEXP (pat, 0, i);
1344 if (GET_CODE (x) == SET)
1345 hash_scan_set (x, insn, table);
1346 else if (GET_CODE (x) == CLOBBER)
1347 hash_scan_clobber (x, insn, table);
1348 else if (GET_CODE (x) == CALL)
1349 hash_scan_call (x, insn, table);
1353 /* Dump the hash table TABLE to file FILE under the name NAME. */
1355 static void
1356 dump_hash_table (FILE *file, const char *name, struct hash_table_d *table)
1358 int i;
1359 /* Flattened out table, so it's printed in proper order. */
1360 struct expr **flat_table;
1361 unsigned int *hash_val;
1362 struct expr *expr;
1364 flat_table = XCNEWVEC (struct expr *, table->n_elems);
1365 hash_val = XNEWVEC (unsigned int, table->n_elems);
1367 for (i = 0; i < (int) table->size; i++)
1368 for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
1370 flat_table[expr->bitmap_index] = expr;
1371 hash_val[expr->bitmap_index] = i;
1374 fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1375 name, table->size, table->n_elems);
1377 for (i = 0; i < (int) table->n_elems; i++)
1378 if (flat_table[i] != 0)
1380 expr = flat_table[i];
1381 fprintf (file, "Index %d (hash value %d; max distance %d)\n ",
1382 expr->bitmap_index, hash_val[i], expr->max_distance);
1383 print_rtl (file, expr->expr);
1384 fprintf (file, "\n");
1387 fprintf (file, "\n");
1389 free (flat_table);
1390 free (hash_val);
1393 /* Record register first/last/block set information for REGNO in INSN.
1395 first_set records the first place in the block where the register
1396 is set and is used to compute "anticipatability".
1398 last_set records the last place in the block where the register
1399 is set and is used to compute "availability".
1401 last_bb records the block for which first_set and last_set are
1402 valid, as a quick test to invalidate them. */
1404 static void
1405 record_last_reg_set_info (rtx insn, int regno)
1407 struct reg_avail_info *info = &reg_avail_info[regno];
1408 int luid = DF_INSN_LUID (insn);
1410 info->last_set = luid;
1411 if (info->last_bb != current_bb)
1413 info->last_bb = current_bb;
1414 info->first_set = luid;
1418 /* Record all of the canonicalized MEMs of record_last_mem_set_info's insn.
1419 Note we store a pair of elements in the list, so they have to be
1420 taken off pairwise. */
1422 static void
1423 canon_list_insert (rtx dest ATTRIBUTE_UNUSED, const_rtx x ATTRIBUTE_UNUSED,
1424 void * v_insn)
1426 rtx dest_addr, insn;
1427 int bb;
1428 modify_pair *pair;
1430 while (GET_CODE (dest) == SUBREG
1431 || GET_CODE (dest) == ZERO_EXTRACT
1432 || GET_CODE (dest) == STRICT_LOW_PART)
1433 dest = XEXP (dest, 0);
1435 /* If DEST is not a MEM, then it will not conflict with a load. Note
1436 that function calls are assumed to clobber memory, but are handled
1437 elsewhere. */
1439 if (! MEM_P (dest))
1440 return;
1442 dest_addr = get_addr (XEXP (dest, 0));
1443 dest_addr = canon_rtx (dest_addr);
1444 insn = (rtx) v_insn;
1445 bb = BLOCK_FOR_INSN (insn)->index;
1447 pair = VEC_safe_push (modify_pair, heap, canon_modify_mem_list[bb], NULL);
1448 pair->dest = dest;
1449 pair->dest_addr = dest_addr;
1452 /* Record memory modification information for INSN. We do not actually care
1453 about the memory location(s) that are set, or even how they are set (consider
1454 a CALL_INSN). We merely need to record which insns modify memory. */
1456 static void
1457 record_last_mem_set_info (rtx insn)
1459 int bb = BLOCK_FOR_INSN (insn)->index;
1461 /* load_killed_in_block_p will handle the case of calls clobbering
1462 everything. */
1463 VEC_safe_push (rtx, heap, modify_mem_list[bb], insn);
1464 bitmap_set_bit (modify_mem_list_set, bb);
1466 if (CALL_P (insn))
1467 bitmap_set_bit (blocks_with_calls, bb);
1468 else
1469 note_stores (PATTERN (insn), canon_list_insert, (void*) insn);
1472 /* Called from compute_hash_table via note_stores to handle one
1473 SET or CLOBBER in an insn. DATA is really the instruction in which
1474 the SET is taking place. */
1476 static void
1477 record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
1479 rtx last_set_insn = (rtx) data;
1481 if (GET_CODE (dest) == SUBREG)
1482 dest = SUBREG_REG (dest);
1484 if (REG_P (dest))
1485 record_last_reg_set_info (last_set_insn, REGNO (dest));
1486 else if (MEM_P (dest)
1487 /* Ignore pushes, they clobber nothing. */
1488 && ! push_operand (dest, GET_MODE (dest)))
1489 record_last_mem_set_info (last_set_insn);
1492 /* Top level function to create an expression hash table.
1494 Expression entries are placed in the hash table if
1495 - they are of the form (set (pseudo-reg) src),
1496 - src is something we want to perform GCSE on,
1497 - none of the operands are subsequently modified in the block
1499 Currently src must be a pseudo-reg or a const_int.
1501 TABLE is the table computed. */
1503 static void
1504 compute_hash_table_work (struct hash_table_d *table)
1506 int i;
1508 /* re-Cache any INSN_LIST nodes we have allocated. */
1509 clear_modify_mem_tables ();
1510 /* Some working arrays used to track first and last set in each block. */
1511 reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ());
1513 for (i = 0; i < max_reg_num (); ++i)
1514 reg_avail_info[i].last_bb = NULL;
1516 FOR_EACH_BB (current_bb)
1518 rtx insn;
1519 unsigned int regno;
1521 /* First pass over the instructions records information used to
1522 determine when registers and memory are first and last set. */
1523 FOR_BB_INSNS (current_bb, insn)
1525 if (!NONDEBUG_INSN_P (insn))
1526 continue;
1528 if (CALL_P (insn))
1530 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1531 if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
1532 record_last_reg_set_info (insn, regno);
1534 if (! RTL_CONST_OR_PURE_CALL_P (insn))
1535 record_last_mem_set_info (insn);
1538 note_stores (PATTERN (insn), record_last_set_info, insn);
1541 /* The next pass builds the hash table. */
1542 FOR_BB_INSNS (current_bb, insn)
1543 if (NONDEBUG_INSN_P (insn))
1544 hash_scan_insn (insn, table);
1547 free (reg_avail_info);
1548 reg_avail_info = NULL;
1551 /* Allocate space for the set/expr hash TABLE.
1552 It is used to determine the number of buckets to use. */
1554 static void
1555 alloc_hash_table (struct hash_table_d *table)
1557 int n;
1559 n = get_max_insn_count ();
1561 table->size = n / 4;
1562 if (table->size < 11)
1563 table->size = 11;
1565 /* Attempt to maintain efficient use of hash table.
1566 Making it an odd number is simplest for now.
1567 ??? Later take some measurements. */
1568 table->size |= 1;
1569 n = table->size * sizeof (struct expr *);
1570 table->table = GNEWVAR (struct expr *, n);
1573 /* Free things allocated by alloc_hash_table. */
1575 static void
1576 free_hash_table (struct hash_table_d *table)
1578 free (table->table);
1581 /* Compute the expression hash table TABLE. */
1583 static void
1584 compute_hash_table (struct hash_table_d *table)
1586 /* Initialize count of number of entries in hash table. */
1587 table->n_elems = 0;
1588 memset (table->table, 0, table->size * sizeof (struct expr *));
1590 compute_hash_table_work (table);
1593 /* Expression tracking support. */
1595 /* Clear canon_modify_mem_list and modify_mem_list tables. */
1596 static void
1597 clear_modify_mem_tables (void)
1599 unsigned i;
1600 bitmap_iterator bi;
1602 EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
1604 VEC_free (rtx, heap, modify_mem_list[i]);
1605 VEC_free (modify_pair, heap, canon_modify_mem_list[i]);
1607 bitmap_clear (modify_mem_list_set);
1608 bitmap_clear (blocks_with_calls);
1611 /* Release memory used by modify_mem_list_set. */
1613 static void
1614 free_modify_mem_tables (void)
1616 clear_modify_mem_tables ();
1617 free (modify_mem_list);
1618 free (canon_modify_mem_list);
1619 modify_mem_list = 0;
1620 canon_modify_mem_list = 0;
1623 /* For each block, compute whether X is transparent. X is either an
1624 expression or an assignment [though we don't care which, for this context
1625 an assignment is treated as an expression]. For each block where an
1626 element of X is modified, reset the INDX bit in BMAP. */
1628 static void
1629 compute_transp (const_rtx x, int indx, sbitmap *bmap)
1631 int i, j;
1632 enum rtx_code code;
1633 const char *fmt;
1635 /* repeat is used to turn tail-recursion into iteration since GCC
1636 can't do it when there's no return value. */
1637 repeat:
1639 if (x == 0)
1640 return;
1642 code = GET_CODE (x);
1643 switch (code)
1645 case REG:
1647 df_ref def;
1648 for (def = DF_REG_DEF_CHAIN (REGNO (x));
1649 def;
1650 def = DF_REF_NEXT_REG (def))
1651 RESET_BIT (bmap[DF_REF_BB (def)->index], indx);
1654 return;
1656 case MEM:
1657 if (! MEM_READONLY_P (x))
1659 bitmap_iterator bi;
1660 unsigned bb_index;
1661 rtx x_addr;
1663 x_addr = get_addr (XEXP (x, 0));
1664 x_addr = canon_rtx (x_addr);
1666 /* First handle all the blocks with calls. We don't need to
1667 do any list walking for them. */
1668 EXECUTE_IF_SET_IN_BITMAP (blocks_with_calls, 0, bb_index, bi)
1670 RESET_BIT (bmap[bb_index], indx);
1673 /* Now iterate over the blocks which have memory modifications
1674 but which do not have any calls. */
1675 EXECUTE_IF_AND_COMPL_IN_BITMAP (modify_mem_list_set,
1676 blocks_with_calls,
1677 0, bb_index, bi)
1679 VEC (modify_pair,heap) *list
1680 = canon_modify_mem_list[bb_index];
1681 modify_pair *pair;
1682 unsigned ix;
1684 FOR_EACH_VEC_ELT_REVERSE (modify_pair, list, ix, pair)
1686 rtx dest = pair->dest;
1687 rtx dest_addr = pair->dest_addr;
1689 if (canon_true_dependence (dest, GET_MODE (dest),
1690 dest_addr, x, x_addr))
1691 RESET_BIT (bmap[bb_index], indx);
1696 x = XEXP (x, 0);
1697 goto repeat;
1699 case PC:
1700 case CC0: /*FIXME*/
1701 case CONST:
1702 case CONST_INT:
1703 case CONST_DOUBLE:
1704 case CONST_FIXED:
1705 case CONST_VECTOR:
1706 case SYMBOL_REF:
1707 case LABEL_REF:
1708 case ADDR_VEC:
1709 case ADDR_DIFF_VEC:
1710 return;
1712 default:
1713 break;
1716 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1718 if (fmt[i] == 'e')
1720 /* If we are about to do the last recursive call
1721 needed at this level, change it into iteration.
1722 This function is called enough to be worth it. */
1723 if (i == 0)
1725 x = XEXP (x, i);
1726 goto repeat;
1729 compute_transp (XEXP (x, i), indx, bmap);
1731 else if (fmt[i] == 'E')
1732 for (j = 0; j < XVECLEN (x, i); j++)
1733 compute_transp (XVECEXP (x, i, j), indx, bmap);
1737 /* Compute PRE+LCM working variables. */
1739 /* Local properties of expressions. */
1741 /* Nonzero for expressions that are transparent in the block. */
1742 static sbitmap *transp;
1744 /* Nonzero for expressions that are computed (available) in the block. */
1745 static sbitmap *comp;
1747 /* Nonzero for expressions that are locally anticipatable in the block. */
1748 static sbitmap *antloc;
1750 /* Nonzero for expressions where this block is an optimal computation
1751 point. */
1752 static sbitmap *pre_optimal;
1754 /* Nonzero for expressions which are redundant in a particular block. */
1755 static sbitmap *pre_redundant;
1757 /* Nonzero for expressions which should be inserted on a specific edge. */
1758 static sbitmap *pre_insert_map;
1760 /* Nonzero for expressions which should be deleted in a specific block. */
1761 static sbitmap *pre_delete_map;
1763 /* Allocate vars used for PRE analysis. */
1765 static void
1766 alloc_pre_mem (int n_blocks, int n_exprs)
1768 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
1769 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
1770 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
1772 pre_optimal = NULL;
1773 pre_redundant = NULL;
1774 pre_insert_map = NULL;
1775 pre_delete_map = NULL;
1776 ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
1778 /* pre_insert and pre_delete are allocated later. */
1781 /* Free vars used for PRE analysis. */
1783 static void
1784 free_pre_mem (void)
1786 sbitmap_vector_free (transp);
1787 sbitmap_vector_free (comp);
1789 /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */
1791 if (pre_optimal)
1792 sbitmap_vector_free (pre_optimal);
1793 if (pre_redundant)
1794 sbitmap_vector_free (pre_redundant);
1795 if (pre_insert_map)
1796 sbitmap_vector_free (pre_insert_map);
1797 if (pre_delete_map)
1798 sbitmap_vector_free (pre_delete_map);
1800 transp = comp = NULL;
1801 pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
1804 /* Remove certain expressions from anticipatable and transparent
1805 sets of basic blocks that have incoming abnormal edge.
1806 For PRE remove potentially trapping expressions to avoid placing
1807 them on abnormal edges. For hoisting remove memory references that
1808 can be clobbered by calls. */
1810 static void
1811 prune_expressions (bool pre_p)
1813 sbitmap prune_exprs;
1814 struct expr *expr;
1815 unsigned int ui;
1816 basic_block bb;
1818 prune_exprs = sbitmap_alloc (expr_hash_table.n_elems);
1819 sbitmap_zero (prune_exprs);
1820 for (ui = 0; ui < expr_hash_table.size; ui++)
1822 for (expr = expr_hash_table.table[ui]; expr; expr = expr->next_same_hash)
1824 /* Note potentially trapping expressions. */
1825 if (may_trap_p (expr->expr))
1827 SET_BIT (prune_exprs, expr->bitmap_index);
1828 continue;
1831 if (!pre_p && MEM_P (expr->expr))
1832 /* Note memory references that can be clobbered by a call.
1833 We do not split abnormal edges in hoisting, so would
1834 a memory reference get hoisted along an abnormal edge,
1835 it would be placed /before/ the call. Therefore, only
1836 constant memory references can be hoisted along abnormal
1837 edges. */
1839 if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
1840 && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
1841 continue;
1843 if (MEM_READONLY_P (expr->expr)
1844 && !MEM_VOLATILE_P (expr->expr)
1845 && MEM_NOTRAP_P (expr->expr))
1846 /* Constant memory reference, e.g., a PIC address. */
1847 continue;
1849 /* ??? Optimally, we would use interprocedural alias
1850 analysis to determine if this mem is actually killed
1851 by this call. */
1853 SET_BIT (prune_exprs, expr->bitmap_index);
1858 FOR_EACH_BB (bb)
1860 edge e;
1861 edge_iterator ei;
1863 /* If the current block is the destination of an abnormal edge, we
1864 kill all trapping (for PRE) and memory (for hoist) expressions
1865 because we won't be able to properly place the instruction on
1866 the edge. So make them neither anticipatable nor transparent.
1867 This is fairly conservative.
1869 ??? For hoisting it may be necessary to check for set-and-jump
1870 instructions here, not just for abnormal edges. The general problem
1871 is that when an expression cannot not be placed right at the end of
1872 a basic block we should account for any side-effects of a subsequent
1873 jump instructions that could clobber the expression. It would
1874 be best to implement this check along the lines of
1875 hoist_expr_reaches_here_p where the target block is already known
1876 and, hence, there's no need to conservatively prune expressions on
1877 "intermediate" set-and-jump instructions. */
1878 FOR_EACH_EDGE (e, ei, bb->preds)
1879 if ((e->flags & EDGE_ABNORMAL)
1880 && (pre_p || CALL_P (BB_END (e->src))))
1882 sbitmap_difference (antloc[bb->index],
1883 antloc[bb->index], prune_exprs);
1884 sbitmap_difference (transp[bb->index],
1885 transp[bb->index], prune_exprs);
1886 break;
1890 sbitmap_free (prune_exprs);
1893 /* It may be necessary to insert a large number of insns on edges to
1894 make the existing occurrences of expressions fully redundant. This
1895 routine examines the set of insertions and deletions and if the ratio
1896 of insertions to deletions is too high for a particular expression, then
1897 the expression is removed from the insertion/deletion sets.
1899 N_ELEMS is the number of elements in the hash table. */
1901 static void
1902 prune_insertions_deletions (int n_elems)
1904 sbitmap_iterator sbi;
1905 sbitmap prune_exprs;
1907 /* We always use I to iterate over blocks/edges and J to iterate over
1908 expressions. */
1909 unsigned int i, j;
1911 /* Counts for the number of times an expression needs to be inserted and
1912 number of times an expression can be removed as a result. */
1913 int *insertions = GCNEWVEC (int, n_elems);
1914 int *deletions = GCNEWVEC (int, n_elems);
1916 /* Set of expressions which require too many insertions relative to
1917 the number of deletions achieved. We will prune these out of the
1918 insertion/deletion sets. */
1919 prune_exprs = sbitmap_alloc (n_elems);
1920 sbitmap_zero (prune_exprs);
1922 /* Iterate over the edges counting the number of times each expression
1923 needs to be inserted. */
1924 for (i = 0; i < (unsigned) n_edges; i++)
1926 EXECUTE_IF_SET_IN_SBITMAP (pre_insert_map[i], 0, j, sbi)
1927 insertions[j]++;
1930 /* Similarly for deletions, but those occur in blocks rather than on
1931 edges. */
1932 for (i = 0; i < (unsigned) last_basic_block; i++)
1934 EXECUTE_IF_SET_IN_SBITMAP (pre_delete_map[i], 0, j, sbi)
1935 deletions[j]++;
1938 /* Now that we have accurate counts, iterate over the elements in the
1939 hash table and see if any need too many insertions relative to the
1940 number of evaluations that can be removed. If so, mark them in
1941 PRUNE_EXPRS. */
1942 for (j = 0; j < (unsigned) n_elems; j++)
1943 if (deletions[j]
1944 && ((unsigned) insertions[j] / deletions[j]) > MAX_GCSE_INSERTION_RATIO)
1945 SET_BIT (prune_exprs, j);
1947 /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS. */
1948 EXECUTE_IF_SET_IN_SBITMAP (prune_exprs, 0, j, sbi)
1950 for (i = 0; i < (unsigned) n_edges; i++)
1951 RESET_BIT (pre_insert_map[i], j);
1953 for (i = 0; i < (unsigned) last_basic_block; i++)
1954 RESET_BIT (pre_delete_map[i], j);
1957 sbitmap_free (prune_exprs);
1958 free (insertions);
1959 free (deletions);
1962 /* Top level routine to do the dataflow analysis needed by PRE. */
1964 static struct edge_list *
1965 compute_pre_data (void)
1967 struct edge_list *edge_list;
1968 basic_block bb;
1970 compute_local_properties (transp, comp, antloc, &expr_hash_table);
1971 prune_expressions (true);
1972 sbitmap_vector_zero (ae_kill, last_basic_block);
1974 /* Compute ae_kill for each basic block using:
1976 ~(TRANSP | COMP)
1979 FOR_EACH_BB (bb)
1981 sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
1982 sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
1985 edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc,
1986 ae_kill, &pre_insert_map, &pre_delete_map);
1987 sbitmap_vector_free (antloc);
1988 antloc = NULL;
1989 sbitmap_vector_free (ae_kill);
1990 ae_kill = NULL;
1992 prune_insertions_deletions (expr_hash_table.n_elems);
1994 return edge_list;
1997 /* PRE utilities */
1999 /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach
2000 block BB.
2002 VISITED is a pointer to a working buffer for tracking which BB's have
2003 been visited. It is NULL for the top-level call.
2005 We treat reaching expressions that go through blocks containing the same
2006 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
2007 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2008 2 as not reaching. The intent is to improve the probability of finding
2009 only one reaching expression and to reduce register lifetimes by picking
2010 the closest such expression. */
2012 static int
2013 pre_expr_reaches_here_p_work (basic_block occr_bb, struct expr *expr,
2014 basic_block bb, char *visited)
2016 edge pred;
2017 edge_iterator ei;
2019 FOR_EACH_EDGE (pred, ei, bb->preds)
2021 basic_block pred_bb = pred->src;
2023 if (pred->src == ENTRY_BLOCK_PTR
2024 /* Has predecessor has already been visited? */
2025 || visited[pred_bb->index])
2026 ;/* Nothing to do. */
2028 /* Does this predecessor generate this expression? */
2029 else if (TEST_BIT (comp[pred_bb->index], expr->bitmap_index))
2031 /* Is this the occurrence we're looking for?
2032 Note that there's only one generating occurrence per block
2033 so we just need to check the block number. */
2034 if (occr_bb == pred_bb)
2035 return 1;
2037 visited[pred_bb->index] = 1;
2039 /* Ignore this predecessor if it kills the expression. */
2040 else if (! TEST_BIT (transp[pred_bb->index], expr->bitmap_index))
2041 visited[pred_bb->index] = 1;
2043 /* Neither gen nor kill. */
2044 else
2046 visited[pred_bb->index] = 1;
2047 if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
2048 return 1;
2052 /* All paths have been checked. */
2053 return 0;
2056 /* The wrapper for pre_expr_reaches_here_work that ensures that any
2057 memory allocated for that function is returned. */
2059 static int
2060 pre_expr_reaches_here_p (basic_block occr_bb, struct expr *expr, basic_block bb)
2062 int rval;
2063 char *visited = XCNEWVEC (char, last_basic_block);
2065 rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
2067 free (visited);
2068 return rval;
2071 /* Generate RTL to copy an EXPR to its `reaching_reg' and return it. */
2073 static rtx
2074 process_insert_insn (struct expr *expr)
2076 rtx reg = expr->reaching_reg;
2077 /* Copy the expression to make sure we don't have any sharing issues. */
2078 rtx exp = copy_rtx (expr->expr);
2079 rtx pat;
2081 start_sequence ();
2083 /* If the expression is something that's an operand, like a constant,
2084 just copy it to a register. */
2085 if (general_operand (exp, GET_MODE (reg)))
2086 emit_move_insn (reg, exp);
2088 /* Otherwise, make a new insn to compute this expression and make sure the
2089 insn will be recognized (this also adds any needed CLOBBERs). */
2090 else
2092 rtx insn = emit_insn (gen_rtx_SET (VOIDmode, reg, exp));
2094 if (insn_invalid_p (insn))
2095 gcc_unreachable ();
2098 pat = get_insns ();
2099 end_sequence ();
2101 return pat;
2104 /* Add EXPR to the end of basic block BB.
2106 This is used by both the PRE and code hoisting. */
2108 static void
2109 insert_insn_end_basic_block (struct expr *expr, basic_block bb)
2111 rtx insn = BB_END (bb);
2112 rtx new_insn;
2113 rtx reg = expr->reaching_reg;
2114 int regno = REGNO (reg);
2115 rtx pat, pat_end;
2117 pat = process_insert_insn (expr);
2118 gcc_assert (pat && INSN_P (pat));
2120 pat_end = pat;
2121 while (NEXT_INSN (pat_end) != NULL_RTX)
2122 pat_end = NEXT_INSN (pat_end);
2124 /* If the last insn is a jump, insert EXPR in front [taking care to
2125 handle cc0, etc. properly]. Similarly we need to care trapping
2126 instructions in presence of non-call exceptions. */
2128 if (JUMP_P (insn)
2129 || (NONJUMP_INSN_P (insn)
2130 && (!single_succ_p (bb)
2131 || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
2133 #ifdef HAVE_cc0
2134 rtx note;
2135 #endif
2137 /* If this is a jump table, then we can't insert stuff here. Since
2138 we know the previous real insn must be the tablejump, we insert
2139 the new instruction just before the tablejump. */
2140 if (GET_CODE (PATTERN (insn)) == ADDR_VEC
2141 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
2142 insn = prev_active_insn (insn);
2144 #ifdef HAVE_cc0
2145 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
2146 if cc0 isn't set. */
2147 note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
2148 if (note)
2149 insn = XEXP (note, 0);
2150 else
2152 rtx maybe_cc0_setter = prev_nonnote_insn (insn);
2153 if (maybe_cc0_setter
2154 && INSN_P (maybe_cc0_setter)
2155 && sets_cc0_p (PATTERN (maybe_cc0_setter)))
2156 insn = maybe_cc0_setter;
2158 #endif
2159 /* FIXME: What if something in cc0/jump uses value set in new insn? */
2160 new_insn = emit_insn_before_noloc (pat, insn, bb);
2163 /* Likewise if the last insn is a call, as will happen in the presence
2164 of exception handling. */
2165 else if (CALL_P (insn)
2166 && (!single_succ_p (bb)
2167 || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
2169 /* Keeping in mind targets with small register classes and parameters
2170 in registers, we search backward and place the instructions before
2171 the first parameter is loaded. Do this for everyone for consistency
2172 and a presumption that we'll get better code elsewhere as well. */
2174 /* Since different machines initialize their parameter registers
2175 in different orders, assume nothing. Collect the set of all
2176 parameter registers. */
2177 insn = find_first_parameter_load (insn, BB_HEAD (bb));
2179 /* If we found all the parameter loads, then we want to insert
2180 before the first parameter load.
2182 If we did not find all the parameter loads, then we might have
2183 stopped on the head of the block, which could be a CODE_LABEL.
2184 If we inserted before the CODE_LABEL, then we would be putting
2185 the insn in the wrong basic block. In that case, put the insn
2186 after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */
2187 while (LABEL_P (insn)
2188 || NOTE_INSN_BASIC_BLOCK_P (insn))
2189 insn = NEXT_INSN (insn);
2191 new_insn = emit_insn_before_noloc (pat, insn, bb);
2193 else
2194 new_insn = emit_insn_after_noloc (pat, insn, bb);
2196 while (1)
2198 if (INSN_P (pat))
2199 add_label_notes (PATTERN (pat), new_insn);
2200 if (pat == pat_end)
2201 break;
2202 pat = NEXT_INSN (pat);
2205 gcse_create_count++;
2207 if (dump_file)
2209 fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ",
2210 bb->index, INSN_UID (new_insn));
2211 fprintf (dump_file, "copying expression %d to reg %d\n",
2212 expr->bitmap_index, regno);
2216 /* Insert partially redundant expressions on edges in the CFG to make
2217 the expressions fully redundant. */
2219 static int
2220 pre_edge_insert (struct edge_list *edge_list, struct expr **index_map)
2222 int e, i, j, num_edges, set_size, did_insert = 0;
2223 sbitmap *inserted;
2225 /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
2226 if it reaches any of the deleted expressions. */
2228 set_size = pre_insert_map[0]->size;
2229 num_edges = NUM_EDGES (edge_list);
2230 inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems);
2231 sbitmap_vector_zero (inserted, num_edges);
2233 for (e = 0; e < num_edges; e++)
2235 int indx;
2236 basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
2238 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
2240 SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
2242 for (j = indx;
2243 insert && j < (int) expr_hash_table.n_elems;
2244 j++, insert >>= 1)
2245 if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
2247 struct expr *expr = index_map[j];
2248 struct occr *occr;
2250 /* Now look at each deleted occurrence of this expression. */
2251 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2253 if (! occr->deleted_p)
2254 continue;
2256 /* Insert this expression on this edge if it would
2257 reach the deleted occurrence in BB. */
2258 if (!TEST_BIT (inserted[e], j))
2260 rtx insn;
2261 edge eg = INDEX_EDGE (edge_list, e);
2263 /* We can't insert anything on an abnormal and
2264 critical edge, so we insert the insn at the end of
2265 the previous block. There are several alternatives
2266 detailed in Morgans book P277 (sec 10.5) for
2267 handling this situation. This one is easiest for
2268 now. */
2270 if (eg->flags & EDGE_ABNORMAL)
2271 insert_insn_end_basic_block (index_map[j], bb);
2272 else
2274 insn = process_insert_insn (index_map[j]);
2275 insert_insn_on_edge (insn, eg);
2278 if (dump_file)
2280 fprintf (dump_file, "PRE: edge (%d,%d), ",
2281 bb->index,
2282 INDEX_EDGE_SUCC_BB (edge_list, e)->index);
2283 fprintf (dump_file, "copy expression %d\n",
2284 expr->bitmap_index);
2287 update_ld_motion_stores (expr);
2288 SET_BIT (inserted[e], j);
2289 did_insert = 1;
2290 gcse_create_count++;
2297 sbitmap_vector_free (inserted);
2298 return did_insert;
2301 /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
2302 Given "old_reg <- expr" (INSN), instead of adding after it
2303 reaching_reg <- old_reg
2304 it's better to do the following:
2305 reaching_reg <- expr
2306 old_reg <- reaching_reg
2307 because this way copy propagation can discover additional PRE
2308 opportunities. But if this fails, we try the old way.
2309 When "expr" is a store, i.e.
2310 given "MEM <- old_reg", instead of adding after it
2311 reaching_reg <- old_reg
2312 it's better to add it before as follows:
2313 reaching_reg <- old_reg
2314 MEM <- reaching_reg. */
2316 static void
2317 pre_insert_copy_insn (struct expr *expr, rtx insn)
2319 rtx reg = expr->reaching_reg;
2320 int regno = REGNO (reg);
2321 int indx = expr->bitmap_index;
2322 rtx pat = PATTERN (insn);
2323 rtx set, first_set, new_insn;
2324 rtx old_reg;
2325 int i;
2327 /* This block matches the logic in hash_scan_insn. */
2328 switch (GET_CODE (pat))
2330 case SET:
2331 set = pat;
2332 break;
2334 case PARALLEL:
2335 /* Search through the parallel looking for the set whose
2336 source was the expression that we're interested in. */
2337 first_set = NULL_RTX;
2338 set = NULL_RTX;
2339 for (i = 0; i < XVECLEN (pat, 0); i++)
2341 rtx x = XVECEXP (pat, 0, i);
2342 if (GET_CODE (x) == SET)
2344 /* If the source was a REG_EQUAL or REG_EQUIV note, we
2345 may not find an equivalent expression, but in this
2346 case the PARALLEL will have a single set. */
2347 if (first_set == NULL_RTX)
2348 first_set = x;
2349 if (expr_equiv_p (SET_SRC (x), expr->expr))
2351 set = x;
2352 break;
2357 gcc_assert (first_set);
2358 if (set == NULL_RTX)
2359 set = first_set;
2360 break;
2362 default:
2363 gcc_unreachable ();
2366 if (REG_P (SET_DEST (set)))
2368 old_reg = SET_DEST (set);
2369 /* Check if we can modify the set destination in the original insn. */
2370 if (validate_change (insn, &SET_DEST (set), reg, 0))
2372 new_insn = gen_move_insn (old_reg, reg);
2373 new_insn = emit_insn_after (new_insn, insn);
2375 else
2377 new_insn = gen_move_insn (reg, old_reg);
2378 new_insn = emit_insn_after (new_insn, insn);
2381 else /* This is possible only in case of a store to memory. */
2383 old_reg = SET_SRC (set);
2384 new_insn = gen_move_insn (reg, old_reg);
2386 /* Check if we can modify the set source in the original insn. */
2387 if (validate_change (insn, &SET_SRC (set), reg, 0))
2388 new_insn = emit_insn_before (new_insn, insn);
2389 else
2390 new_insn = emit_insn_after (new_insn, insn);
2393 gcse_create_count++;
2395 if (dump_file)
2396 fprintf (dump_file,
2397 "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
2398 BLOCK_FOR_INSN (insn)->index, INSN_UID (new_insn), indx,
2399 INSN_UID (insn), regno);
2402 /* Copy available expressions that reach the redundant expression
2403 to `reaching_reg'. */
2405 static void
2406 pre_insert_copies (void)
2408 unsigned int i, added_copy;
2409 struct expr *expr;
2410 struct occr *occr;
2411 struct occr *avail;
2413 /* For each available expression in the table, copy the result to
2414 `reaching_reg' if the expression reaches a deleted one.
2416 ??? The current algorithm is rather brute force.
2417 Need to do some profiling. */
2419 for (i = 0; i < expr_hash_table.size; i++)
2420 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2422 /* If the basic block isn't reachable, PPOUT will be TRUE. However,
2423 we don't want to insert a copy here because the expression may not
2424 really be redundant. So only insert an insn if the expression was
2425 deleted. This test also avoids further processing if the
2426 expression wasn't deleted anywhere. */
2427 if (expr->reaching_reg == NULL)
2428 continue;
2430 /* Set when we add a copy for that expression. */
2431 added_copy = 0;
2433 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2435 if (! occr->deleted_p)
2436 continue;
2438 for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
2440 rtx insn = avail->insn;
2442 /* No need to handle this one if handled already. */
2443 if (avail->copied_p)
2444 continue;
2446 /* Don't handle this one if it's a redundant one. */
2447 if (INSN_DELETED_P (insn))
2448 continue;
2450 /* Or if the expression doesn't reach the deleted one. */
2451 if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
2452 expr,
2453 BLOCK_FOR_INSN (occr->insn)))
2454 continue;
2456 added_copy = 1;
2458 /* Copy the result of avail to reaching_reg. */
2459 pre_insert_copy_insn (expr, insn);
2460 avail->copied_p = 1;
2464 if (added_copy)
2465 update_ld_motion_stores (expr);
2469 /* Emit move from SRC to DEST noting the equivalence with expression computed
2470 in INSN. */
2472 static rtx
2473 gcse_emit_move_after (rtx dest, rtx src, rtx insn)
2475 rtx new_rtx;
2476 rtx set = single_set (insn), set2;
2477 rtx note;
2478 rtx eqv;
2480 /* This should never fail since we're creating a reg->reg copy
2481 we've verified to be valid. */
2483 new_rtx = emit_insn_after (gen_move_insn (dest, src), insn);
2485 /* Note the equivalence for local CSE pass. */
2486 set2 = single_set (new_rtx);
2487 if (!set2 || !rtx_equal_p (SET_DEST (set2), dest))
2488 return new_rtx;
2489 if ((note = find_reg_equal_equiv_note (insn)))
2490 eqv = XEXP (note, 0);
2491 else
2492 eqv = SET_SRC (set);
2494 set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv));
2496 return new_rtx;
2499 /* Delete redundant computations.
2500 Deletion is done by changing the insn to copy the `reaching_reg' of
2501 the expression into the result of the SET. It is left to later passes
2502 (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
2504 Return nonzero if a change is made. */
2506 static int
2507 pre_delete (void)
2509 unsigned int i;
2510 int changed;
2511 struct expr *expr;
2512 struct occr *occr;
2514 changed = 0;
2515 for (i = 0; i < expr_hash_table.size; i++)
2516 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2518 int indx = expr->bitmap_index;
2520 /* We only need to search antic_occr since we require ANTLOC != 0. */
2521 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2523 rtx insn = occr->insn;
2524 rtx set;
2525 basic_block bb = BLOCK_FOR_INSN (insn);
2527 /* We only delete insns that have a single_set. */
2528 if (TEST_BIT (pre_delete_map[bb->index], indx)
2529 && (set = single_set (insn)) != 0
2530 && dbg_cnt (pre_insn))
2532 /* Create a pseudo-reg to store the result of reaching
2533 expressions into. Get the mode for the new pseudo from
2534 the mode of the original destination pseudo. */
2535 if (expr->reaching_reg == NULL)
2536 expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set));
2538 gcse_emit_move_after (SET_DEST (set), expr->reaching_reg, insn);
2539 delete_insn (insn);
2540 occr->deleted_p = 1;
2541 changed = 1;
2542 gcse_subst_count++;
2544 if (dump_file)
2546 fprintf (dump_file,
2547 "PRE: redundant insn %d (expression %d) in ",
2548 INSN_UID (insn), indx);
2549 fprintf (dump_file, "bb %d, reaching reg is %d\n",
2550 bb->index, REGNO (expr->reaching_reg));
2556 return changed;
2559 /* Perform GCSE optimizations using PRE.
2560 This is called by one_pre_gcse_pass after all the dataflow analysis
2561 has been done.
2563 This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
2564 lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
2565 Compiler Design and Implementation.
2567 ??? A new pseudo reg is created to hold the reaching expression. The nice
2568 thing about the classical approach is that it would try to use an existing
2569 reg. If the register can't be adequately optimized [i.e. we introduce
2570 reload problems], one could add a pass here to propagate the new register
2571 through the block.
2573 ??? We don't handle single sets in PARALLELs because we're [currently] not
2574 able to copy the rest of the parallel when we insert copies to create full
2575 redundancies from partial redundancies. However, there's no reason why we
2576 can't handle PARALLELs in the cases where there are no partial
2577 redundancies. */
2579 static int
2580 pre_gcse (struct edge_list *edge_list)
2582 unsigned int i;
2583 int did_insert, changed;
2584 struct expr **index_map;
2585 struct expr *expr;
2587 /* Compute a mapping from expression number (`bitmap_index') to
2588 hash table entry. */
2590 index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
2591 for (i = 0; i < expr_hash_table.size; i++)
2592 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2593 index_map[expr->bitmap_index] = expr;
2595 /* Delete the redundant insns first so that
2596 - we know what register to use for the new insns and for the other
2597 ones with reaching expressions
2598 - we know which insns are redundant when we go to create copies */
2600 changed = pre_delete ();
2601 did_insert = pre_edge_insert (edge_list, index_map);
2603 /* In other places with reaching expressions, copy the expression to the
2604 specially allocated pseudo-reg that reaches the redundant expr. */
2605 pre_insert_copies ();
2606 if (did_insert)
2608 commit_edge_insertions ();
2609 changed = 1;
2612 free (index_map);
2613 return changed;
2616 /* Top level routine to perform one PRE GCSE pass.
2618 Return nonzero if a change was made. */
2620 static int
2621 one_pre_gcse_pass (void)
2623 int changed = 0;
2625 gcse_subst_count = 0;
2626 gcse_create_count = 0;
2628 /* Return if there's nothing to do, or it is too expensive. */
2629 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
2630 || is_too_expensive (_("PRE disabled")))
2631 return 0;
2633 /* We need alias. */
2634 init_alias_analysis ();
2636 bytes_used = 0;
2637 gcc_obstack_init (&gcse_obstack);
2638 alloc_gcse_mem ();
2640 alloc_hash_table (&expr_hash_table);
2641 add_noreturn_fake_exit_edges ();
2642 if (flag_gcse_lm)
2643 compute_ld_motion_mems ();
2645 compute_hash_table (&expr_hash_table);
2646 if (flag_gcse_lm)
2647 trim_ld_motion_mems ();
2648 if (dump_file)
2649 dump_hash_table (dump_file, "Expression", &expr_hash_table);
2651 if (expr_hash_table.n_elems > 0)
2653 struct edge_list *edge_list;
2654 alloc_pre_mem (last_basic_block, expr_hash_table.n_elems);
2655 edge_list = compute_pre_data ();
2656 changed |= pre_gcse (edge_list);
2657 free_edge_list (edge_list);
2658 free_pre_mem ();
2661 if (flag_gcse_lm)
2662 free_ld_motion_mems ();
2663 remove_fake_exit_edges ();
2664 free_hash_table (&expr_hash_table);
2666 free_gcse_mem ();
2667 obstack_free (&gcse_obstack, NULL);
2669 /* We are finished with alias. */
2670 end_alias_analysis ();
2672 if (dump_file)
2674 fprintf (dump_file, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ",
2675 current_function_name (), n_basic_blocks, bytes_used);
2676 fprintf (dump_file, "%d substs, %d insns created\n",
2677 gcse_subst_count, gcse_create_count);
2680 return changed;
2683 /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
2684 to INSN. If such notes are added to an insn which references a
2685 CODE_LABEL, the LABEL_NUSES count is incremented. We have to add
2686 that note, because the following loop optimization pass requires
2687 them. */
2689 /* ??? If there was a jump optimization pass after gcse and before loop,
2690 then we would not need to do this here, because jump would add the
2691 necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes. */
2693 static void
2694 add_label_notes (rtx x, rtx insn)
2696 enum rtx_code code = GET_CODE (x);
2697 int i, j;
2698 const char *fmt;
2700 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
2702 /* This code used to ignore labels that referred to dispatch tables to
2703 avoid flow generating (slightly) worse code.
2705 We no longer ignore such label references (see LABEL_REF handling in
2706 mark_jump_label for additional information). */
2708 /* There's no reason for current users to emit jump-insns with
2709 such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
2710 notes. */
2711 gcc_assert (!JUMP_P (insn));
2712 add_reg_note (insn, REG_LABEL_OPERAND, XEXP (x, 0));
2714 if (LABEL_P (XEXP (x, 0)))
2715 LABEL_NUSES (XEXP (x, 0))++;
2717 return;
2720 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2722 if (fmt[i] == 'e')
2723 add_label_notes (XEXP (x, i), insn);
2724 else if (fmt[i] == 'E')
2725 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2726 add_label_notes (XVECEXP (x, i, j), insn);
2730 /* Code Hoisting variables and subroutines. */
2732 /* Very busy expressions. */
2733 static sbitmap *hoist_vbein;
2734 static sbitmap *hoist_vbeout;
2736 /* ??? We could compute post dominators and run this algorithm in
2737 reverse to perform tail merging, doing so would probably be
2738 more effective than the tail merging code in jump.c.
2740 It's unclear if tail merging could be run in parallel with
2741 code hoisting. It would be nice. */
2743 /* Allocate vars used for code hoisting analysis. */
2745 static void
2746 alloc_code_hoist_mem (int n_blocks, int n_exprs)
2748 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
2749 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
2750 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
2752 hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
2753 hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
2756 /* Free vars used for code hoisting analysis. */
2758 static void
2759 free_code_hoist_mem (void)
2761 sbitmap_vector_free (antloc);
2762 sbitmap_vector_free (transp);
2763 sbitmap_vector_free (comp);
2765 sbitmap_vector_free (hoist_vbein);
2766 sbitmap_vector_free (hoist_vbeout);
2768 free_dominance_info (CDI_DOMINATORS);
2771 /* Compute the very busy expressions at entry/exit from each block.
2773 An expression is very busy if all paths from a given point
2774 compute the expression. */
2776 static void
2777 compute_code_hoist_vbeinout (void)
2779 int changed, passes;
2780 basic_block bb;
2782 sbitmap_vector_zero (hoist_vbeout, last_basic_block);
2783 sbitmap_vector_zero (hoist_vbein, last_basic_block);
2785 passes = 0;
2786 changed = 1;
2788 while (changed)
2790 changed = 0;
2792 /* We scan the blocks in the reverse order to speed up
2793 the convergence. */
2794 FOR_EACH_BB_REVERSE (bb)
2796 if (bb->next_bb != EXIT_BLOCK_PTR)
2798 sbitmap_intersection_of_succs (hoist_vbeout[bb->index],
2799 hoist_vbein, bb->index);
2801 /* Include expressions in VBEout that are calculated
2802 in BB and available at its end. */
2803 sbitmap_a_or_b (hoist_vbeout[bb->index],
2804 hoist_vbeout[bb->index], comp[bb->index]);
2807 changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index],
2808 antloc[bb->index],
2809 hoist_vbeout[bb->index],
2810 transp[bb->index]);
2813 passes++;
2816 if (dump_file)
2818 fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
2820 FOR_EACH_BB (bb)
2822 fprintf (dump_file, "vbein (%d): ", bb->index);
2823 dump_sbitmap_file (dump_file, hoist_vbein[bb->index]);
2824 fprintf (dump_file, "vbeout(%d): ", bb->index);
2825 dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]);
2830 /* Top level routine to do the dataflow analysis needed by code hoisting. */
2832 static void
2833 compute_code_hoist_data (void)
2835 compute_local_properties (transp, comp, antloc, &expr_hash_table);
2836 prune_expressions (false);
2837 compute_code_hoist_vbeinout ();
2838 calculate_dominance_info (CDI_DOMINATORS);
2839 if (dump_file)
2840 fprintf (dump_file, "\n");
2843 /* Determine if the expression identified by EXPR_INDEX would
2844 reach BB unimpared if it was placed at the end of EXPR_BB.
2845 Stop the search if the expression would need to be moved more
2846 than DISTANCE instructions.
2848 It's unclear exactly what Muchnick meant by "unimpared". It seems
2849 to me that the expression must either be computed or transparent in
2850 *every* block in the path(s) from EXPR_BB to BB. Any other definition
2851 would allow the expression to be hoisted out of loops, even if
2852 the expression wasn't a loop invariant.
2854 Contrast this to reachability for PRE where an expression is
2855 considered reachable if *any* path reaches instead of *all*
2856 paths. */
2858 static int
2859 hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb,
2860 char *visited, int distance, int *bb_size)
2862 edge pred;
2863 edge_iterator ei;
2864 int visited_allocated_locally = 0;
2866 /* Terminate the search if distance, for which EXPR is allowed to move,
2867 is exhausted. */
2868 if (distance > 0)
2870 distance -= bb_size[bb->index];
2872 if (distance <= 0)
2873 return 0;
2875 else
2876 gcc_assert (distance == 0);
2878 if (visited == NULL)
2880 visited_allocated_locally = 1;
2881 visited = XCNEWVEC (char, last_basic_block);
2884 FOR_EACH_EDGE (pred, ei, bb->preds)
2886 basic_block pred_bb = pred->src;
2888 if (pred->src == ENTRY_BLOCK_PTR)
2889 break;
2890 else if (pred_bb == expr_bb)
2891 continue;
2892 else if (visited[pred_bb->index])
2893 continue;
2895 else if (! TEST_BIT (transp[pred_bb->index], expr_index))
2896 break;
2898 /* Not killed. */
2899 else
2901 visited[pred_bb->index] = 1;
2902 if (! hoist_expr_reaches_here_p (expr_bb, expr_index, pred_bb,
2903 visited, distance, bb_size))
2904 break;
2907 if (visited_allocated_locally)
2908 free (visited);
2910 return (pred == NULL);
2913 /* Find occurence in BB. */
2915 static struct occr *
2916 find_occr_in_bb (struct occr *occr, basic_block bb)
2918 /* Find the right occurrence of this expression. */
2919 while (occr && BLOCK_FOR_INSN (occr->insn) != bb)
2920 occr = occr->next;
2922 return occr;
2925 /* Actually perform code hoisting. */
2927 static int
2928 hoist_code (void)
2930 basic_block bb, dominated;
2931 VEC (basic_block, heap) *dom_tree_walk;
2932 unsigned int dom_tree_walk_index;
2933 VEC (basic_block, heap) *domby;
2934 unsigned int i,j;
2935 struct expr **index_map;
2936 struct expr *expr;
2937 int *to_bb_head;
2938 int *bb_size;
2939 int changed = 0;
2941 /* Compute a mapping from expression number (`bitmap_index') to
2942 hash table entry. */
2944 index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
2945 for (i = 0; i < expr_hash_table.size; i++)
2946 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2947 index_map[expr->bitmap_index] = expr;
2949 /* Calculate sizes of basic blocks and note how far
2950 each instruction is from the start of its block. We then use this
2951 data to restrict distance an expression can travel. */
2953 to_bb_head = XCNEWVEC (int, get_max_uid ());
2954 bb_size = XCNEWVEC (int, last_basic_block);
2956 FOR_EACH_BB (bb)
2958 rtx insn;
2959 int to_head;
2961 to_head = 0;
2962 FOR_BB_INSNS (bb, insn)
2964 /* Don't count debug instructions to avoid them affecting
2965 decision choices. */
2966 if (NONDEBUG_INSN_P (insn))
2967 to_bb_head[INSN_UID (insn)] = to_head++;
2970 bb_size[bb->index] = to_head;
2973 gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR->succs) == 1
2974 && (EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest
2975 == ENTRY_BLOCK_PTR->next_bb));
2977 dom_tree_walk = get_all_dominated_blocks (CDI_DOMINATORS,
2978 ENTRY_BLOCK_PTR->next_bb);
2980 /* Walk over each basic block looking for potentially hoistable
2981 expressions, nothing gets hoisted from the entry block. */
2982 FOR_EACH_VEC_ELT (basic_block, dom_tree_walk, dom_tree_walk_index, bb)
2984 domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH);
2986 if (VEC_length (basic_block, domby) == 0)
2987 continue;
2989 /* Examine each expression that is very busy at the exit of this
2990 block. These are the potentially hoistable expressions. */
2991 for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++)
2993 if (TEST_BIT (hoist_vbeout[bb->index], i))
2995 /* Current expression. */
2996 struct expr *expr = index_map[i];
2997 /* Number of occurences of EXPR that can be hoisted to BB. */
2998 int hoistable = 0;
2999 /* Basic blocks that have occurences reachable from BB. */
3000 bitmap_head _from_bbs, *from_bbs = &_from_bbs;
3001 /* Occurences reachable from BB. */
3002 VEC (occr_t, heap) *occrs_to_hoist = NULL;
3003 /* We want to insert the expression into BB only once, so
3004 note when we've inserted it. */
3005 int insn_inserted_p;
3006 occr_t occr;
3008 bitmap_initialize (from_bbs, 0);
3010 /* If an expression is computed in BB and is available at end of
3011 BB, hoist all occurences dominated by BB to BB. */
3012 if (TEST_BIT (comp[bb->index], i))
3014 occr = find_occr_in_bb (expr->antic_occr, bb);
3016 if (occr)
3018 /* An occurence might've been already deleted
3019 while processing a dominator of BB. */
3020 if (!occr->deleted_p)
3022 gcc_assert (NONDEBUG_INSN_P (occr->insn));
3023 hoistable++;
3026 else
3027 hoistable++;
3030 /* We've found a potentially hoistable expression, now
3031 we look at every block BB dominates to see if it
3032 computes the expression. */
3033 FOR_EACH_VEC_ELT (basic_block, domby, j, dominated)
3035 int max_distance;
3037 /* Ignore self dominance. */
3038 if (bb == dominated)
3039 continue;
3040 /* We've found a dominated block, now see if it computes
3041 the busy expression and whether or not moving that
3042 expression to the "beginning" of that block is safe. */
3043 if (!TEST_BIT (antloc[dominated->index], i))
3044 continue;
3046 occr = find_occr_in_bb (expr->antic_occr, dominated);
3047 gcc_assert (occr);
3049 /* An occurence might've been already deleted
3050 while processing a dominator of BB. */
3051 if (occr->deleted_p)
3052 continue;
3053 gcc_assert (NONDEBUG_INSN_P (occr->insn));
3055 max_distance = expr->max_distance;
3056 if (max_distance > 0)
3057 /* Adjust MAX_DISTANCE to account for the fact that
3058 OCCR won't have to travel all of DOMINATED, but
3059 only part of it. */
3060 max_distance += (bb_size[dominated->index]
3061 - to_bb_head[INSN_UID (occr->insn)]);
3063 /* Note if the expression would reach the dominated block
3064 unimpared if it was placed at the end of BB.
3066 Keep track of how many times this expression is hoistable
3067 from a dominated block into BB. */
3068 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL,
3069 max_distance, bb_size))
3071 hoistable++;
3072 VEC_safe_push (occr_t, heap,
3073 occrs_to_hoist, occr);
3074 bitmap_set_bit (from_bbs, dominated->index);
3078 /* If we found more than one hoistable occurrence of this
3079 expression, then note it in the vector of expressions to
3080 hoist. It makes no sense to hoist things which are computed
3081 in only one BB, and doing so tends to pessimize register
3082 allocation. One could increase this value to try harder
3083 to avoid any possible code expansion due to register
3084 allocation issues; however experiments have shown that
3085 the vast majority of hoistable expressions are only movable
3086 from two successors, so raising this threshold is likely
3087 to nullify any benefit we get from code hoisting. */
3088 if (hoistable > 1 && dbg_cnt (hoist_insn))
3090 /* If (hoistable != VEC_length), then there is
3091 an occurence of EXPR in BB itself. Don't waste
3092 time looking for LCA in this case. */
3093 if ((unsigned) hoistable
3094 == VEC_length (occr_t, occrs_to_hoist))
3096 basic_block lca;
3098 lca = nearest_common_dominator_for_set (CDI_DOMINATORS,
3099 from_bbs);
3100 if (lca != bb)
3101 /* Punt, it's better to hoist these occurences to
3102 LCA. */
3103 VEC_free (occr_t, heap, occrs_to_hoist);
3106 else
3107 /* Punt, no point hoisting a single occurence. */
3108 VEC_free (occr_t, heap, occrs_to_hoist);
3110 insn_inserted_p = 0;
3112 /* Walk through occurences of I'th expressions we want
3113 to hoist to BB and make the transformations. */
3114 FOR_EACH_VEC_ELT (occr_t, occrs_to_hoist, j, occr)
3116 rtx insn;
3117 rtx set;
3119 gcc_assert (!occr->deleted_p);
3121 insn = occr->insn;
3122 set = single_set (insn);
3123 gcc_assert (set);
3125 /* Create a pseudo-reg to store the result of reaching
3126 expressions into. Get the mode for the new pseudo
3127 from the mode of the original destination pseudo.
3129 It is important to use new pseudos whenever we
3130 emit a set. This will allow reload to use
3131 rematerialization for such registers. */
3132 if (!insn_inserted_p)
3133 expr->reaching_reg
3134 = gen_reg_rtx_and_attrs (SET_DEST (set));
3136 gcse_emit_move_after (SET_DEST (set), expr->reaching_reg,
3137 insn);
3138 delete_insn (insn);
3139 occr->deleted_p = 1;
3140 changed = 1;
3141 gcse_subst_count++;
3143 if (!insn_inserted_p)
3145 insert_insn_end_basic_block (expr, bb);
3146 insn_inserted_p = 1;
3150 VEC_free (occr_t, heap, occrs_to_hoist);
3151 bitmap_clear (from_bbs);
3154 VEC_free (basic_block, heap, domby);
3157 VEC_free (basic_block, heap, dom_tree_walk);
3158 free (bb_size);
3159 free (to_bb_head);
3160 free (index_map);
3162 return changed;
3165 /* Top level routine to perform one code hoisting (aka unification) pass
3167 Return nonzero if a change was made. */
3169 static int
3170 one_code_hoisting_pass (void)
3172 int changed = 0;
3174 gcse_subst_count = 0;
3175 gcse_create_count = 0;
3177 /* Return if there's nothing to do, or it is too expensive. */
3178 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
3179 || is_too_expensive (_("GCSE disabled")))
3180 return 0;
3182 doing_code_hoisting_p = true;
3184 /* We need alias. */
3185 init_alias_analysis ();
3187 bytes_used = 0;
3188 gcc_obstack_init (&gcse_obstack);
3189 alloc_gcse_mem ();
3191 alloc_hash_table (&expr_hash_table);
3192 compute_hash_table (&expr_hash_table);
3193 if (dump_file)
3194 dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table);
3196 if (expr_hash_table.n_elems > 0)
3198 alloc_code_hoist_mem (last_basic_block, expr_hash_table.n_elems);
3199 compute_code_hoist_data ();
3200 changed = hoist_code ();
3201 free_code_hoist_mem ();
3204 free_hash_table (&expr_hash_table);
3205 free_gcse_mem ();
3206 obstack_free (&gcse_obstack, NULL);
3208 /* We are finished with alias. */
3209 end_alias_analysis ();
3211 if (dump_file)
3213 fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ",
3214 current_function_name (), n_basic_blocks, bytes_used);
3215 fprintf (dump_file, "%d substs, %d insns created\n",
3216 gcse_subst_count, gcse_create_count);
3219 doing_code_hoisting_p = false;
3221 return changed;
3224 /* Here we provide the things required to do store motion towards the exit.
3225 In order for this to be effective, gcse also needed to be taught how to
3226 move a load when it is killed only by a store to itself.
3228 int i;
3229 float a[10];
3231 void foo(float scale)
3233 for (i=0; i<10; i++)
3234 a[i] *= scale;
3237 'i' is both loaded and stored to in the loop. Normally, gcse cannot move
3238 the load out since its live around the loop, and stored at the bottom
3239 of the loop.
3241 The 'Load Motion' referred to and implemented in this file is
3242 an enhancement to gcse which when using edge based LCM, recognizes
3243 this situation and allows gcse to move the load out of the loop.
3245 Once gcse has hoisted the load, store motion can then push this
3246 load towards the exit, and we end up with no loads or stores of 'i'
3247 in the loop. */
3249 static hashval_t
3250 pre_ldst_expr_hash (const void *p)
3252 int do_not_record_p = 0;
3253 const struct ls_expr *const x = (const struct ls_expr *) p;
3254 return
3255 hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false);
3258 static int
3259 pre_ldst_expr_eq (const void *p1, const void *p2)
3261 const struct ls_expr *const ptr1 = (const struct ls_expr *) p1,
3262 *const ptr2 = (const struct ls_expr *) p2;
3263 return expr_equiv_p (ptr1->pattern, ptr2->pattern);
3266 /* This will search the ldst list for a matching expression. If it
3267 doesn't find one, we create one and initialize it. */
3269 static struct ls_expr *
3270 ldst_entry (rtx x)
3272 int do_not_record_p = 0;
3273 struct ls_expr * ptr;
3274 unsigned int hash;
3275 void **slot;
3276 struct ls_expr e;
3278 hash = hash_rtx (x, GET_MODE (x), &do_not_record_p,
3279 NULL, /*have_reg_qty=*/false);
3281 e.pattern = x;
3282 slot = htab_find_slot_with_hash (pre_ldst_table, &e, hash, INSERT);
3283 if (*slot)
3284 return (struct ls_expr *)*slot;
3286 ptr = XNEW (struct ls_expr);
3288 ptr->next = pre_ldst_mems;
3289 ptr->expr = NULL;
3290 ptr->pattern = x;
3291 ptr->pattern_regs = NULL_RTX;
3292 ptr->loads = NULL_RTX;
3293 ptr->stores = NULL_RTX;
3294 ptr->reaching_reg = NULL_RTX;
3295 ptr->invalid = 0;
3296 ptr->index = 0;
3297 ptr->hash_index = hash;
3298 pre_ldst_mems = ptr;
3299 *slot = ptr;
3301 return ptr;
3304 /* Free up an individual ldst entry. */
3306 static void
3307 free_ldst_entry (struct ls_expr * ptr)
3309 free_INSN_LIST_list (& ptr->loads);
3310 free_INSN_LIST_list (& ptr->stores);
3312 free (ptr);
3315 /* Free up all memory associated with the ldst list. */
3317 static void
3318 free_ld_motion_mems (void)
3320 if (pre_ldst_table)
3321 htab_delete (pre_ldst_table);
3322 pre_ldst_table = NULL;
3324 while (pre_ldst_mems)
3326 struct ls_expr * tmp = pre_ldst_mems;
3328 pre_ldst_mems = pre_ldst_mems->next;
3330 free_ldst_entry (tmp);
3333 pre_ldst_mems = NULL;
3336 /* Dump debugging info about the ldst list. */
3338 static void
3339 print_ldst_list (FILE * file)
3341 struct ls_expr * ptr;
3343 fprintf (file, "LDST list: \n");
3345 for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
3347 fprintf (file, " Pattern (%3d): ", ptr->index);
3349 print_rtl (file, ptr->pattern);
3351 fprintf (file, "\n Loads : ");
3353 if (ptr->loads)
3354 print_rtl (file, ptr->loads);
3355 else
3356 fprintf (file, "(nil)");
3358 fprintf (file, "\n Stores : ");
3360 if (ptr->stores)
3361 print_rtl (file, ptr->stores);
3362 else
3363 fprintf (file, "(nil)");
3365 fprintf (file, "\n\n");
3368 fprintf (file, "\n");
3371 /* Returns 1 if X is in the list of ldst only expressions. */
3373 static struct ls_expr *
3374 find_rtx_in_ldst (rtx x)
3376 struct ls_expr e;
3377 void **slot;
3378 if (!pre_ldst_table)
3379 return NULL;
3380 e.pattern = x;
3381 slot = htab_find_slot (pre_ldst_table, &e, NO_INSERT);
3382 if (!slot || ((struct ls_expr *)*slot)->invalid)
3383 return NULL;
3384 return (struct ls_expr *) *slot;
3387 /* Load Motion for loads which only kill themselves. */
3389 /* Return true if x, a MEM, is a simple access with no side effects.
3390 These are the types of loads we consider for the ld_motion list,
3391 otherwise we let the usual aliasing take care of it. */
3393 static int
3394 simple_mem (const_rtx x)
3396 if (MEM_VOLATILE_P (x))
3397 return 0;
3399 if (GET_MODE (x) == BLKmode)
3400 return 0;
3402 /* If we are handling exceptions, we must be careful with memory references
3403 that may trap. If we are not, the behavior is undefined, so we may just
3404 continue. */
3405 if (cfun->can_throw_non_call_exceptions && may_trap_p (x))
3406 return 0;
3408 if (side_effects_p (x))
3409 return 0;
3411 /* Do not consider function arguments passed on stack. */
3412 if (reg_mentioned_p (stack_pointer_rtx, x))
3413 return 0;
3415 if (flag_float_store && FLOAT_MODE_P (GET_MODE (x)))
3416 return 0;
3418 return 1;
3421 /* Make sure there isn't a buried reference in this pattern anywhere.
3422 If there is, invalidate the entry for it since we're not capable
3423 of fixing it up just yet.. We have to be sure we know about ALL
3424 loads since the aliasing code will allow all entries in the
3425 ld_motion list to not-alias itself. If we miss a load, we will get
3426 the wrong value since gcse might common it and we won't know to
3427 fix it up. */
3429 static void
3430 invalidate_any_buried_refs (rtx x)
3432 const char * fmt;
3433 int i, j;
3434 struct ls_expr * ptr;
3436 /* Invalidate it in the list. */
3437 if (MEM_P (x) && simple_mem (x))
3439 ptr = ldst_entry (x);
3440 ptr->invalid = 1;
3443 /* Recursively process the insn. */
3444 fmt = GET_RTX_FORMAT (GET_CODE (x));
3446 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3448 if (fmt[i] == 'e')
3449 invalidate_any_buried_refs (XEXP (x, i));
3450 else if (fmt[i] == 'E')
3451 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3452 invalidate_any_buried_refs (XVECEXP (x, i, j));
3456 /* Find all the 'simple' MEMs which are used in LOADs and STORES. Simple
3457 being defined as MEM loads and stores to symbols, with no side effects
3458 and no registers in the expression. For a MEM destination, we also
3459 check that the insn is still valid if we replace the destination with a
3460 REG, as is done in update_ld_motion_stores. If there are any uses/defs
3461 which don't match this criteria, they are invalidated and trimmed out
3462 later. */
3464 static void
3465 compute_ld_motion_mems (void)
3467 struct ls_expr * ptr;
3468 basic_block bb;
3469 rtx insn;
3471 pre_ldst_mems = NULL;
3472 pre_ldst_table
3473 = htab_create (13, pre_ldst_expr_hash, pre_ldst_expr_eq, NULL);
3475 FOR_EACH_BB (bb)
3477 FOR_BB_INSNS (bb, insn)
3479 if (NONDEBUG_INSN_P (insn))
3481 if (GET_CODE (PATTERN (insn)) == SET)
3483 rtx src = SET_SRC (PATTERN (insn));
3484 rtx dest = SET_DEST (PATTERN (insn));
3486 /* Check for a simple LOAD... */
3487 if (MEM_P (src) && simple_mem (src))
3489 ptr = ldst_entry (src);
3490 if (REG_P (dest))
3491 ptr->loads = alloc_INSN_LIST (insn, ptr->loads);
3492 else
3493 ptr->invalid = 1;
3495 else
3497 /* Make sure there isn't a buried load somewhere. */
3498 invalidate_any_buried_refs (src);
3501 /* Check for stores. Don't worry about aliased ones, they
3502 will block any movement we might do later. We only care
3503 about this exact pattern since those are the only
3504 circumstance that we will ignore the aliasing info. */
3505 if (MEM_P (dest) && simple_mem (dest))
3507 ptr = ldst_entry (dest);
3509 if (! MEM_P (src)
3510 && GET_CODE (src) != ASM_OPERANDS
3511 /* Check for REG manually since want_to_gcse_p
3512 returns 0 for all REGs. */
3513 && can_assign_to_reg_without_clobbers_p (src))
3514 ptr->stores = alloc_INSN_LIST (insn, ptr->stores);
3515 else
3516 ptr->invalid = 1;
3519 else
3520 invalidate_any_buried_refs (PATTERN (insn));
3526 /* Remove any references that have been either invalidated or are not in the
3527 expression list for pre gcse. */
3529 static void
3530 trim_ld_motion_mems (void)
3532 struct ls_expr * * last = & pre_ldst_mems;
3533 struct ls_expr * ptr = pre_ldst_mems;
3535 while (ptr != NULL)
3537 struct expr * expr;
3539 /* Delete if entry has been made invalid. */
3540 if (! ptr->invalid)
3542 /* Delete if we cannot find this mem in the expression list. */
3543 unsigned int hash = ptr->hash_index % expr_hash_table.size;
3545 for (expr = expr_hash_table.table[hash];
3546 expr != NULL;
3547 expr = expr->next_same_hash)
3548 if (expr_equiv_p (expr->expr, ptr->pattern))
3549 break;
3551 else
3552 expr = (struct expr *) 0;
3554 if (expr)
3556 /* Set the expression field if we are keeping it. */
3557 ptr->expr = expr;
3558 last = & ptr->next;
3559 ptr = ptr->next;
3561 else
3563 *last = ptr->next;
3564 htab_remove_elt_with_hash (pre_ldst_table, ptr, ptr->hash_index);
3565 free_ldst_entry (ptr);
3566 ptr = * last;
3570 /* Show the world what we've found. */
3571 if (dump_file && pre_ldst_mems != NULL)
3572 print_ldst_list (dump_file);
3575 /* This routine will take an expression which we are replacing with
3576 a reaching register, and update any stores that are needed if
3577 that expression is in the ld_motion list. Stores are updated by
3578 copying their SRC to the reaching register, and then storing
3579 the reaching register into the store location. These keeps the
3580 correct value in the reaching register for the loads. */
3582 static void
3583 update_ld_motion_stores (struct expr * expr)
3585 struct ls_expr * mem_ptr;
3587 if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
3589 /* We can try to find just the REACHED stores, but is shouldn't
3590 matter to set the reaching reg everywhere... some might be
3591 dead and should be eliminated later. */
3593 /* We replace (set mem expr) with (set reg expr) (set mem reg)
3594 where reg is the reaching reg used in the load. We checked in
3595 compute_ld_motion_mems that we can replace (set mem expr) with
3596 (set reg expr) in that insn. */
3597 rtx list = mem_ptr->stores;
3599 for ( ; list != NULL_RTX; list = XEXP (list, 1))
3601 rtx insn = XEXP (list, 0);
3602 rtx pat = PATTERN (insn);
3603 rtx src = SET_SRC (pat);
3604 rtx reg = expr->reaching_reg;
3605 rtx copy;
3607 /* If we've already copied it, continue. */
3608 if (expr->reaching_reg == src)
3609 continue;
3611 if (dump_file)
3613 fprintf (dump_file, "PRE: store updated with reaching reg ");
3614 print_rtl (dump_file, reg);
3615 fprintf (dump_file, ":\n ");
3616 print_inline_rtx (dump_file, insn, 8);
3617 fprintf (dump_file, "\n");
3620 copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat)));
3621 emit_insn_before (copy, insn);
3622 SET_SRC (pat) = reg;
3623 df_insn_rescan (insn);
3625 /* un-recognize this pattern since it's probably different now. */
3626 INSN_CODE (insn) = -1;
3627 gcse_create_count++;
3632 /* Return true if the graph is too expensive to optimize. PASS is the
3633 optimization about to be performed. */
3635 static bool
3636 is_too_expensive (const char *pass)
3638 /* Trying to perform global optimizations on flow graphs which have
3639 a high connectivity will take a long time and is unlikely to be
3640 particularly useful.
3642 In normal circumstances a cfg should have about twice as many
3643 edges as blocks. But we do not want to punish small functions
3644 which have a couple switch statements. Rather than simply
3645 threshold the number of blocks, uses something with a more
3646 graceful degradation. */
3647 if (n_edges > 20000 + n_basic_blocks * 4)
3649 warning (OPT_Wdisabled_optimization,
3650 "%s: %d basic blocks and %d edges/basic block",
3651 pass, n_basic_blocks, n_edges / n_basic_blocks);
3653 return true;
3656 /* If allocating memory for the dataflow bitmaps would take up too much
3657 storage it's better just to disable the optimization. */
3658 if ((n_basic_blocks
3659 * SBITMAP_SET_SIZE (max_reg_num ())
3660 * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
3662 warning (OPT_Wdisabled_optimization,
3663 "%s: %d basic blocks and %d registers",
3664 pass, n_basic_blocks, max_reg_num ());
3666 return true;
3669 return false;
3672 /* All the passes implemented in this file. Each pass has its
3673 own gate and execute function, and at the end of the file a
3674 pass definition for passes.c.
3676 We do not construct an accurate cfg in functions which call
3677 setjmp, so none of these passes runs if the function calls
3678 setjmp.
3679 FIXME: Should just handle setjmp via REG_SETJMP notes. */
3681 static bool
3682 gate_rtl_pre (void)
3684 return optimize > 0 && flag_gcse
3685 && !cfun->calls_setjmp
3686 && optimize_function_for_speed_p (cfun)
3687 && dbg_cnt (pre);
3690 static unsigned int
3691 execute_rtl_pre (void)
3693 int changed;
3694 delete_unreachable_blocks ();
3695 df_analyze ();
3696 changed = one_pre_gcse_pass ();
3697 flag_rerun_cse_after_global_opts |= changed;
3698 if (changed)
3699 cleanup_cfg (0);
3700 return 0;
3703 static bool
3704 gate_rtl_hoist (void)
3706 return optimize > 0 && flag_gcse
3707 && !cfun->calls_setjmp
3708 /* It does not make sense to run code hoisting unless we are optimizing
3709 for code size -- it rarely makes programs faster, and can make then
3710 bigger if we did PRE (when optimizing for space, we don't run PRE). */
3711 && optimize_function_for_size_p (cfun)
3712 && dbg_cnt (hoist);
3715 static unsigned int
3716 execute_rtl_hoist (void)
3718 int changed;
3719 delete_unreachable_blocks ();
3720 df_analyze ();
3721 changed = one_code_hoisting_pass ();
3722 flag_rerun_cse_after_global_opts |= changed;
3723 if (changed)
3724 cleanup_cfg (0);
3725 return 0;
3728 struct rtl_opt_pass pass_rtl_pre =
3731 RTL_PASS,
3732 "rtl pre", /* name */
3733 gate_rtl_pre, /* gate */
3734 execute_rtl_pre, /* execute */
3735 NULL, /* sub */
3736 NULL, /* next */
3737 0, /* static_pass_number */
3738 TV_PRE, /* tv_id */
3739 PROP_cfglayout, /* properties_required */
3740 0, /* properties_provided */
3741 0, /* properties_destroyed */
3742 0, /* todo_flags_start */
3743 TODO_df_finish | TODO_verify_rtl_sharing |
3744 TODO_verify_flow | TODO_ggc_collect /* todo_flags_finish */
3748 struct rtl_opt_pass pass_rtl_hoist =
3751 RTL_PASS,
3752 "hoist", /* name */
3753 gate_rtl_hoist, /* gate */
3754 execute_rtl_hoist, /* execute */
3755 NULL, /* sub */
3756 NULL, /* next */
3757 0, /* static_pass_number */
3758 TV_HOIST, /* tv_id */
3759 PROP_cfglayout, /* properties_required */
3760 0, /* properties_provided */
3761 0, /* properties_destroyed */
3762 0, /* todo_flags_start */
3763 TODO_df_finish | TODO_verify_rtl_sharing |
3764 TODO_verify_flow | TODO_ggc_collect /* todo_flags_finish */
3768 #include "gt-gcse.h"