Merge from trunk: 215733-215743
[official-gcc.git] / gcc-4_7 / gcc / gcse.c
blob64d7ac6dfc07e359d1a4d77e7f8a6b8ebd8bee0c
1 /* Partial redundancy elimination / Hoisting for RTL.
2 Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
3 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* TODO
22 - reordering of memory allocation and freeing to be more space efficient
23 - do rough calc of how many regs are needed in each block, and a rough
24 calc of how many regs are available in each class and use that to
25 throttle back the code in cases where RTX_COST is minimal.
28 /* References searched while implementing this.
30 Compilers Principles, Techniques and Tools
31 Aho, Sethi, Ullman
32 Addison-Wesley, 1988
34 Global Optimization by Suppression of Partial Redundancies
35 E. Morel, C. Renvoise
36 communications of the acm, Vol. 22, Num. 2, Feb. 1979
38 A Portable Machine-Independent Global Optimizer - Design and Measurements
39 Frederick Chow
40 Stanford Ph.D. thesis, Dec. 1983
42 A Fast Algorithm for Code Movement Optimization
43 D.M. Dhamdhere
44 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
46 A Solution to a Problem with Morel and Renvoise's
47 Global Optimization by Suppression of Partial Redundancies
48 K-H Drechsler, M.P. Stadel
49 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
51 Practical Adaptation of the Global Optimization
52 Algorithm of Morel and Renvoise
53 D.M. Dhamdhere
54 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
56 Efficiently Computing Static Single Assignment Form and the Control
57 Dependence Graph
58 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
59 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
61 Lazy Code Motion
62 J. Knoop, O. Ruthing, B. Steffen
63 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
65 What's In a Region? Or Computing Control Dependence Regions in Near-Linear
66 Time for Reducible Flow Control
67 Thomas Ball
68 ACM Letters on Programming Languages and Systems,
69 Vol. 2, Num. 1-4, Mar-Dec 1993
71 An Efficient Representation for Sparse Sets
72 Preston Briggs, Linda Torczon
73 ACM Letters on Programming Languages and Systems,
74 Vol. 2, Num. 1-4, Mar-Dec 1993
76 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
77 K-H Drechsler, M.P. Stadel
78 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
80 Partial Dead Code Elimination
81 J. Knoop, O. Ruthing, B. Steffen
82 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
84 Effective Partial Redundancy Elimination
85 P. Briggs, K.D. Cooper
86 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
88 The Program Structure Tree: Computing Control Regions in Linear Time
89 R. Johnson, D. Pearson, K. Pingali
90 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
92 Optimal Code Motion: Theory and Practice
93 J. Knoop, O. Ruthing, B. Steffen
94 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
96 The power of assignment motion
97 J. Knoop, O. Ruthing, B. Steffen
98 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
100 Global code motion / global value numbering
101 C. Click
102 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
104 Value Driven Redundancy Elimination
105 L.T. Simpson
106 Rice University Ph.D. thesis, Apr. 1996
108 Value Numbering
109 L.T. Simpson
110 Massively Scalar Compiler Project, Rice University, Sep. 1996
112 High Performance Compilers for Parallel Computing
113 Michael Wolfe
114 Addison-Wesley, 1996
116 Advanced Compiler Design and Implementation
117 Steven Muchnick
118 Morgan Kaufmann, 1997
120 Building an Optimizing Compiler
121 Robert Morgan
122 Digital Press, 1998
124 People wishing to speed up the code here should read:
125 Elimination Algorithms for Data Flow Analysis
126 B.G. Ryder, M.C. Paull
127 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
129 How to Analyze Large Programs Efficiently and Informatively
130 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
131 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
133 People wishing to do something different can find various possibilities
134 in the above papers and elsewhere.
137 #include "config.h"
138 #include "system.h"
139 #include "coretypes.h"
140 #include "tm.h"
141 #include "diagnostic-core.h"
142 #include "toplev.h"
144 #include "rtl.h"
145 #include "tree.h"
146 #include "tm_p.h"
147 #include "regs.h"
148 #include "hard-reg-set.h"
149 #include "flags.h"
150 #include "insn-config.h"
151 #include "recog.h"
152 #include "basic-block.h"
153 #include "output.h"
154 #include "function.h"
155 #include "expr.h"
156 #include "except.h"
157 #include "ggc.h"
158 #include "params.h"
159 #include "cselib.h"
160 #include "intl.h"
161 #include "obstack.h"
162 #include "timevar.h"
163 #include "tree-pass.h"
164 #include "hashtab.h"
165 #include "df.h"
166 #include "dbgcnt.h"
167 #include "target.h"
168 #include "gcse.h"
169 #include "cfgloop.h"
171 /* We support GCSE via Partial Redundancy Elimination. PRE optimizations
172 are a superset of those done by classic GCSE.
174 Two passes of copy/constant propagation are done around PRE or hoisting
175 because the first one enables more GCSE and the second one helps to clean
176 up the copies that PRE and HOIST create. This is needed more for PRE than
177 for HOIST because code hoisting will try to use an existing register
178 containing the common subexpression rather than create a new one. This is
179 harder to do for PRE because of the code motion (which HOIST doesn't do).
181 Expressions we are interested in GCSE-ing are of the form
182 (set (pseudo-reg) (expression)).
183 Function want_to_gcse_p says what these are.
185 In addition, expressions in REG_EQUAL notes are candidates for GCSE-ing.
186 This allows PRE to hoist expressions that are expressed in multiple insns,
187 such as complex address calculations (e.g. for PIC code, or loads with a
188 high part and a low part).
190 PRE handles moving invariant expressions out of loops (by treating them as
191 partially redundant).
193 **********************
195 We used to support multiple passes but there are diminishing returns in
196 doing so. The first pass usually makes 90% of the changes that are doable.
197 A second pass can make a few more changes made possible by the first pass.
198 Experiments show any further passes don't make enough changes to justify
199 the expense.
201 A study of spec92 using an unlimited number of passes:
202 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
203 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
204 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
206 It was found doing copy propagation between each pass enables further
207 substitutions.
209 This study was done before expressions in REG_EQUAL notes were added as
210 candidate expressions for optimization, and before the GIMPLE optimizers
211 were added. Probably, multiple passes is even less efficient now than
212 at the time when the study was conducted.
214 PRE is quite expensive in complicated functions because the DFA can take
215 a while to converge. Hence we only perform one pass.
217 **********************
219 The steps for PRE are:
221 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
223 2) Perform the data flow analysis for PRE.
225 3) Delete the redundant instructions
227 4) Insert the required copies [if any] that make the partially
228 redundant instructions fully redundant.
230 5) For other reaching expressions, insert an instruction to copy the value
231 to a newly created pseudo that will reach the redundant instruction.
233 The deletion is done first so that when we do insertions we
234 know which pseudo reg to use.
236 Various papers have argued that PRE DFA is expensive (O(n^2)) and others
237 argue it is not. The number of iterations for the algorithm to converge
238 is typically 2-4 so I don't view it as that expensive (relatively speaking).
240 PRE GCSE depends heavily on the second CPROP pass to clean up the copies
241 we create. To make an expression reach the place where it's redundant,
242 the result of the expression is copied to a new register, and the redundant
243 expression is deleted by replacing it with this new register. Classic GCSE
244 doesn't have this problem as much as it computes the reaching defs of
245 each register in each block and thus can try to use an existing
246 register. */
248 /* GCSE global vars. */
250 struct target_gcse default_target_gcse;
251 #if SWITCHABLE_TARGET
252 struct target_gcse *this_target_gcse = &default_target_gcse;
253 #endif
255 /* Set to non-zero if CSE should run after all GCSE optimizations are done. */
256 int flag_rerun_cse_after_global_opts;
258 /* An obstack for our working variables. */
259 static struct obstack gcse_obstack;
261 struct reg_use {rtx reg_rtx; };
263 /* Hash table of expressions. */
265 struct expr
267 /* The expression. */
268 rtx expr;
269 /* Index in the available expression bitmaps. */
270 int bitmap_index;
271 /* Next entry with the same hash. */
272 struct expr *next_same_hash;
273 /* List of anticipatable occurrences in basic blocks in the function.
274 An "anticipatable occurrence" is one that is the first occurrence in the
275 basic block, the operands are not modified in the basic block prior
276 to the occurrence and the output is not used between the start of
277 the block and the occurrence. */
278 struct occr *antic_occr;
279 /* List of available occurrence in basic blocks in the function.
280 An "available occurrence" is one that is the last occurrence in the
281 basic block and the operands are not modified by following statements in
282 the basic block [including this insn]. */
283 struct occr *avail_occr;
284 /* Non-null if the computation is PRE redundant.
285 The value is the newly created pseudo-reg to record a copy of the
286 expression in all the places that reach the redundant copy. */
287 rtx reaching_reg;
288 /* Maximum distance in instructions this expression can travel.
289 We avoid moving simple expressions for more than a few instructions
290 to keep register pressure under control.
291 A value of "0" removes restrictions on how far the expression can
292 travel. */
293 int max_distance;
296 /* Occurrence of an expression.
297 There is one per basic block. If a pattern appears more than once the
298 last appearance is used [or first for anticipatable expressions]. */
300 struct occr
302 /* Next occurrence of this expression. */
303 struct occr *next;
304 /* The insn that computes the expression. */
305 rtx insn;
306 /* Nonzero if this [anticipatable] occurrence has been deleted. */
307 char deleted_p;
308 /* Nonzero if this [available] occurrence has been copied to
309 reaching_reg. */
310 /* ??? This is mutually exclusive with deleted_p, so they could share
311 the same byte. */
312 char copied_p;
315 typedef struct occr *occr_t;
316 DEF_VEC_P (occr_t);
317 DEF_VEC_ALLOC_P (occr_t, heap);
319 /* Expression hash tables.
320 Each hash table is an array of buckets.
321 ??? It is known that if it were an array of entries, structure elements
322 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
323 not clear whether in the final analysis a sufficient amount of memory would
324 be saved as the size of the available expression bitmaps would be larger
325 [one could build a mapping table without holes afterwards though].
326 Someday I'll perform the computation and figure it out. */
328 struct hash_table_d
330 /* The table itself.
331 This is an array of `expr_hash_table_size' elements. */
332 struct expr **table;
334 /* Size of the hash table, in elements. */
335 unsigned int size;
337 /* Number of hash table elements. */
338 unsigned int n_elems;
341 /* Expression hash table. */
342 static struct hash_table_d expr_hash_table;
344 /* This is a list of expressions which are MEMs and will be used by load
345 or store motion.
346 Load motion tracks MEMs which aren't killed by anything except itself,
347 i.e. loads and stores to a single location.
348 We can then allow movement of these MEM refs with a little special
349 allowance. (all stores copy the same value to the reaching reg used
350 for the loads). This means all values used to store into memory must have
351 no side effects so we can re-issue the setter value. */
353 struct ls_expr
355 struct expr * expr; /* Gcse expression reference for LM. */
356 rtx pattern; /* Pattern of this mem. */
357 rtx pattern_regs; /* List of registers mentioned by the mem. */
358 rtx loads; /* INSN list of loads seen. */
359 rtx stores; /* INSN list of stores seen. */
360 struct ls_expr * next; /* Next in the list. */
361 int invalid; /* Invalid for some reason. */
362 int index; /* If it maps to a bitmap index. */
363 unsigned int hash_index; /* Index when in a hash table. */
364 rtx reaching_reg; /* Register to use when re-writing. */
367 /* Head of the list of load/store memory refs. */
368 static struct ls_expr * pre_ldst_mems = NULL;
370 /* Hashtable for the load/store memory refs. */
371 static htab_t pre_ldst_table = NULL;
373 /* Bitmap containing one bit for each register in the program.
374 Used when performing GCSE to track which registers have been set since
375 the start of the basic block. */
376 static regset reg_set_bitmap;
378 /* Array, indexed by basic block number for a list of insns which modify
379 memory within that block. */
380 static VEC (rtx,heap) **modify_mem_list;
381 static bitmap modify_mem_list_set;
383 typedef struct modify_pair_s
385 rtx dest; /* A MEM. */
386 rtx dest_addr; /* The canonical address of `dest'. */
387 } modify_pair;
389 DEF_VEC_O(modify_pair);
390 DEF_VEC_ALLOC_O(modify_pair,heap);
392 /* This array parallels modify_mem_list, except that it stores MEMs
393 being set and their canonicalized memory addresses. */
394 static VEC (modify_pair,heap) **canon_modify_mem_list;
396 /* Bitmap indexed by block numbers to record which blocks contain
397 function calls. */
398 static bitmap blocks_with_calls;
400 /* Various variables for statistics gathering. */
402 /* Memory used in a pass.
403 This isn't intended to be absolutely precise. Its intent is only
404 to keep an eye on memory usage. */
405 static int bytes_used;
407 /* GCSE substitutions made. */
408 static int gcse_subst_count;
409 /* Number of copy instructions created. */
410 static int gcse_create_count;
412 /* Doing code hoisting. */
413 static bool doing_code_hoisting_p = false;
415 /* For available exprs */
416 static sbitmap *ae_kill;
418 static void compute_can_copy (void);
419 static void *gmalloc (size_t) ATTRIBUTE_MALLOC;
420 static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC;
421 static void *gcse_alloc (unsigned long);
422 static void alloc_gcse_mem (void);
423 static void free_gcse_mem (void);
424 static void hash_scan_insn (rtx, struct hash_table_d *);
425 static void hash_scan_set (rtx, rtx, struct hash_table_d *);
426 static void hash_scan_clobber (rtx, rtx, struct hash_table_d *);
427 static void hash_scan_call (rtx, rtx, struct hash_table_d *);
428 static int want_to_gcse_p (rtx, int *);
429 static int oprs_unchanged_p (const_rtx, const_rtx, int);
430 static int oprs_anticipatable_p (const_rtx, const_rtx);
431 static int oprs_available_p (const_rtx, const_rtx);
432 static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, int,
433 struct hash_table_d *);
434 static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int);
435 static int expr_equiv_p (const_rtx, const_rtx);
436 static void record_last_reg_set_info (rtx, int);
437 static void record_last_mem_set_info (rtx);
438 static void record_last_set_info (rtx, const_rtx, void *);
439 static void compute_hash_table (struct hash_table_d *);
440 static void alloc_hash_table (struct hash_table_d *);
441 static void free_hash_table (struct hash_table_d *);
442 static void compute_hash_table_work (struct hash_table_d *);
443 static void dump_hash_table (FILE *, const char *, struct hash_table_d *);
444 static void compute_transp (const_rtx, int, sbitmap *);
445 static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
446 struct hash_table_d *);
447 static void mems_conflict_for_gcse_p (rtx, const_rtx, void *);
448 static int load_killed_in_block_p (const_basic_block, int, const_rtx, int);
449 static void canon_list_insert (rtx, const_rtx, void *);
450 static void alloc_pre_mem (int, int);
451 static void free_pre_mem (void);
452 static struct edge_list *compute_pre_data (void);
453 static int pre_expr_reaches_here_p (basic_block, struct expr *,
454 basic_block);
455 static void insert_insn_end_basic_block (struct expr *, basic_block);
456 static void pre_insert_copy_insn (struct expr *, rtx);
457 static void pre_insert_copies (void);
458 static int pre_delete (void);
459 static int pre_gcse (struct edge_list *);
460 static int one_pre_gcse_pass (void);
461 static void add_label_notes (rtx, rtx);
462 static void alloc_code_hoist_mem (int, int);
463 static void free_code_hoist_mem (void);
464 static void compute_code_hoist_vbeinout (void);
465 static void compute_code_hoist_data (void);
466 static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *,
467 int, int *);
468 static int hoist_code (void);
469 static int one_code_hoisting_pass (void);
470 static rtx process_insert_insn (struct expr *);
471 static int pre_edge_insert (struct edge_list *, struct expr **);
472 static int pre_expr_reaches_here_p_work (basic_block, struct expr *,
473 basic_block, char *);
474 static struct ls_expr * ldst_entry (rtx);
475 static void free_ldst_entry (struct ls_expr *);
476 static void free_ld_motion_mems (void);
477 static void print_ldst_list (FILE *);
478 static struct ls_expr * find_rtx_in_ldst (rtx);
479 static int simple_mem (const_rtx);
480 static void invalidate_any_buried_refs (rtx);
481 static void compute_ld_motion_mems (void);
482 static void trim_ld_motion_mems (void);
483 static void update_ld_motion_stores (struct expr *);
484 static void clear_modify_mem_tables (void);
485 static void free_modify_mem_tables (void);
486 static rtx gcse_emit_move_after (rtx, rtx, rtx);
487 static bool is_too_expensive (const char *);
488 static unsigned find_loop_lsm_limit (struct loop*);
489 static void adjust_loop_lsm_limit (struct loop*, unsigned);
490 static void init_loop_lsm_limit (void);
491 static void fini_loop_lsm_limit (void);
493 #define GNEW(T) ((T *) gmalloc (sizeof (T)))
494 #define GCNEW(T) ((T *) gcalloc (1, sizeof (T)))
496 #define GNEWVEC(T, N) ((T *) gmalloc (sizeof (T) * (N)))
497 #define GCNEWVEC(T, N) ((T *) gcalloc ((N), sizeof (T)))
499 #define GNEWVAR(T, S) ((T *) gmalloc ((S)))
500 #define GCNEWVAR(T, S) ((T *) gcalloc (1, (S)))
502 #define GOBNEW(T) ((T *) gcse_alloc (sizeof (T)))
503 #define GOBNEWVAR(T, S) ((T *) gcse_alloc ((S)))
505 /* Misc. utilities. */
507 #define can_copy \
508 (this_target_gcse->x_can_copy)
509 #define can_copy_init_p \
510 (this_target_gcse->x_can_copy_init_p)
512 /* Compute which modes support reg/reg copy operations. */
514 static void
515 compute_can_copy (void)
517 int i;
518 #ifndef AVOID_CCMODE_COPIES
519 rtx reg, insn;
520 #endif
521 memset (can_copy, 0, NUM_MACHINE_MODES);
523 start_sequence ();
524 for (i = 0; i < NUM_MACHINE_MODES; i++)
525 if (GET_MODE_CLASS (i) == MODE_CC)
527 #ifdef AVOID_CCMODE_COPIES
528 can_copy[i] = 0;
529 #else
530 reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
531 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
532 if (recog (PATTERN (insn), insn, NULL) >= 0)
533 can_copy[i] = 1;
534 #endif
536 else
537 can_copy[i] = 1;
539 end_sequence ();
542 /* Returns whether the mode supports reg/reg copy operations. */
544 bool
545 can_copy_p (enum machine_mode mode)
547 if (! can_copy_init_p)
549 compute_can_copy ();
550 can_copy_init_p = true;
553 return can_copy[mode] != 0;
556 /* Cover function to xmalloc to record bytes allocated. */
558 static void *
559 gmalloc (size_t size)
561 bytes_used += size;
562 return xmalloc (size);
565 /* Cover function to xcalloc to record bytes allocated. */
567 static void *
568 gcalloc (size_t nelem, size_t elsize)
570 bytes_used += nelem * elsize;
571 return xcalloc (nelem, elsize);
574 /* Cover function to obstack_alloc. */
576 static void *
577 gcse_alloc (unsigned long size)
579 bytes_used += size;
580 return obstack_alloc (&gcse_obstack, size);
583 /* Allocate memory for the reg/memory set tracking tables.
584 This is called at the start of each pass. */
586 static void
587 alloc_gcse_mem (void)
589 /* Allocate vars to track sets of regs. */
590 reg_set_bitmap = ALLOC_REG_SET (NULL);
592 /* Allocate array to keep a list of insns which modify memory in each
593 basic block. */
594 modify_mem_list = GCNEWVEC (VEC (rtx,heap) *, last_basic_block);
595 canon_modify_mem_list = GCNEWVEC (VEC (modify_pair,heap) *,
596 last_basic_block);
597 modify_mem_list_set = BITMAP_ALLOC (NULL);
598 blocks_with_calls = BITMAP_ALLOC (NULL);
601 /* Free memory allocated by alloc_gcse_mem. */
603 static void
604 free_gcse_mem (void)
606 FREE_REG_SET (reg_set_bitmap);
608 free_modify_mem_tables ();
609 BITMAP_FREE (modify_mem_list_set);
610 BITMAP_FREE (blocks_with_calls);
613 /* Compute the local properties of each recorded expression.
615 Local properties are those that are defined by the block, irrespective of
616 other blocks.
618 An expression is transparent in a block if its operands are not modified
619 in the block.
621 An expression is computed (locally available) in a block if it is computed
622 at least once and expression would contain the same value if the
623 computation was moved to the end of the block.
625 An expression is locally anticipatable in a block if it is computed at
626 least once and expression would contain the same value if the computation
627 was moved to the beginning of the block.
629 We call this routine for pre and code hoisting. They all compute
630 basically the same information and thus can easily share this code.
632 TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
633 properties. If NULL, then it is not necessary to compute or record that
634 particular property.
636 TABLE controls which hash table to look at. */
638 static void
639 compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc,
640 struct hash_table_d *table)
642 unsigned int i;
644 /* Initialize any bitmaps that were passed in. */
645 if (transp)
647 sbitmap_vector_ones (transp, last_basic_block);
650 if (comp)
651 sbitmap_vector_zero (comp, last_basic_block);
652 if (antloc)
653 sbitmap_vector_zero (antloc, last_basic_block);
655 for (i = 0; i < table->size; i++)
657 struct expr *expr;
659 for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
661 int indx = expr->bitmap_index;
662 struct occr *occr;
664 /* The expression is transparent in this block if it is not killed.
665 We start by assuming all are transparent [none are killed], and
666 then reset the bits for those that are. */
667 if (transp)
668 compute_transp (expr->expr, indx, transp);
670 /* The occurrences recorded in antic_occr are exactly those that
671 we want to set to nonzero in ANTLOC. */
672 if (antloc)
673 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
675 SET_BIT (antloc[BLOCK_FOR_INSN (occr->insn)->index], indx);
677 /* While we're scanning the table, this is a good place to
678 initialize this. */
679 occr->deleted_p = 0;
682 /* The occurrences recorded in avail_occr are exactly those that
683 we want to set to nonzero in COMP. */
684 if (comp)
685 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
687 SET_BIT (comp[BLOCK_FOR_INSN (occr->insn)->index], indx);
689 /* While we're scanning the table, this is a good place to
690 initialize this. */
691 occr->copied_p = 0;
694 /* While we're scanning the table, this is a good place to
695 initialize this. */
696 expr->reaching_reg = 0;
701 /* Hash table support. */
703 struct reg_avail_info
705 basic_block last_bb;
706 int first_set;
707 int last_set;
710 static struct reg_avail_info *reg_avail_info;
711 static basic_block current_bb;
713 /* See whether X, the source of a set, is something we want to consider for
714 GCSE. */
716 static int
717 want_to_gcse_p (rtx x, int *max_distance_ptr)
719 #ifdef STACK_REGS
720 /* On register stack architectures, don't GCSE constants from the
721 constant pool, as the benefits are often swamped by the overhead
722 of shuffling the register stack between basic blocks. */
723 if (IS_STACK_MODE (GET_MODE (x)))
724 x = avoid_constant_pool_reference (x);
725 #endif
727 /* GCSE'ing constants:
729 We do not specifically distinguish between constant and non-constant
730 expressions in PRE and Hoist. We use set_src_cost below to limit
731 the maximum distance simple expressions can travel.
733 Nevertheless, constants are much easier to GCSE, and, hence,
734 it is easy to overdo the optimizations. Usually, excessive PRE and
735 Hoisting of constant leads to increased register pressure.
737 RA can deal with this by rematerialing some of the constants.
738 Therefore, it is important that the back-end generates sets of constants
739 in a way that allows reload rematerialize them under high register
740 pressure, i.e., a pseudo register with REG_EQUAL to constant
741 is set only once. Failing to do so will result in IRA/reload
742 spilling such constants under high register pressure instead of
743 rematerializing them. */
745 switch (GET_CODE (x))
747 case REG:
748 case SUBREG:
749 case CALL:
750 return 0;
752 case CONST_INT:
753 case CONST_DOUBLE:
754 case CONST_FIXED:
755 case CONST_VECTOR:
756 if (!doing_code_hoisting_p)
757 /* Do not PRE constants. */
758 return 0;
760 /* FALLTHRU */
762 default:
763 if (doing_code_hoisting_p)
764 /* PRE doesn't implement max_distance restriction. */
766 int cost;
767 int max_distance;
769 gcc_assert (!optimize_function_for_speed_p (cfun)
770 && optimize_function_for_size_p (cfun));
771 cost = set_src_cost (x, 0);
773 if (cost < COSTS_N_INSNS (GCSE_UNRESTRICTED_COST))
775 max_distance = (GCSE_COST_DISTANCE_RATIO * cost) / 10;
776 if (max_distance == 0)
777 return 0;
779 gcc_assert (max_distance > 0);
781 else
782 max_distance = 0;
784 if (max_distance_ptr)
785 *max_distance_ptr = max_distance;
788 return can_assign_to_reg_without_clobbers_p (x);
792 /* Used internally by can_assign_to_reg_without_clobbers_p. */
794 static GTY(()) rtx test_insn;
796 /* Return true if we can assign X to a pseudo register such that the
797 resulting insn does not result in clobbering a hard register as a
798 side-effect.
800 Additionally, if the target requires it, check that the resulting insn
801 can be copied. If it cannot, this means that X is special and probably
802 has hidden side-effects we don't want to mess with.
804 This function is typically used by code motion passes, to verify
805 that it is safe to insert an insn without worrying about clobbering
806 maybe live hard regs. */
808 bool
809 can_assign_to_reg_without_clobbers_p (rtx x)
811 int num_clobbers = 0;
812 int icode;
814 /* If this is a valid operand, we are OK. If it's VOIDmode, we aren't. */
815 if (general_operand (x, GET_MODE (x)))
816 return 1;
817 else if (GET_MODE (x) == VOIDmode)
818 return 0;
820 /* Otherwise, check if we can make a valid insn from it. First initialize
821 our test insn if we haven't already. */
822 if (test_insn == 0)
824 test_insn
825 = make_insn_raw (gen_rtx_SET (VOIDmode,
826 gen_rtx_REG (word_mode,
827 FIRST_PSEUDO_REGISTER * 2),
828 const0_rtx));
829 NEXT_INSN (test_insn) = PREV_INSN (test_insn) = 0;
832 /* Now make an insn like the one we would make when GCSE'ing and see if
833 valid. */
834 PUT_MODE (SET_DEST (PATTERN (test_insn)), GET_MODE (x));
835 SET_SRC (PATTERN (test_insn)) = x;
837 icode = recog (PATTERN (test_insn), test_insn, &num_clobbers);
838 if (icode < 0)
839 return false;
841 if (num_clobbers > 0 && added_clobbers_hard_reg_p (icode))
842 return false;
844 if (targetm.cannot_copy_insn_p && targetm.cannot_copy_insn_p (test_insn))
845 return false;
847 return true;
850 /* Return nonzero if the operands of expression X are unchanged from the
851 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
852 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */
854 static int
855 oprs_unchanged_p (const_rtx x, const_rtx insn, int avail_p)
857 int i, j;
858 enum rtx_code code;
859 const char *fmt;
861 if (x == 0)
862 return 1;
864 code = GET_CODE (x);
865 switch (code)
867 case REG:
869 struct reg_avail_info *info = &reg_avail_info[REGNO (x)];
871 if (info->last_bb != current_bb)
872 return 1;
873 if (avail_p)
874 return info->last_set < DF_INSN_LUID (insn);
875 else
876 return info->first_set >= DF_INSN_LUID (insn);
879 case MEM:
880 if (load_killed_in_block_p (current_bb, DF_INSN_LUID (insn),
881 x, avail_p))
882 return 0;
883 else
884 return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
886 case PRE_DEC:
887 case PRE_INC:
888 case POST_DEC:
889 case POST_INC:
890 case PRE_MODIFY:
891 case POST_MODIFY:
892 return 0;
894 case PC:
895 case CC0: /*FIXME*/
896 case CONST:
897 case CONST_INT:
898 case CONST_DOUBLE:
899 case CONST_FIXED:
900 case CONST_VECTOR:
901 case SYMBOL_REF:
902 case LABEL_REF:
903 case ADDR_VEC:
904 case ADDR_DIFF_VEC:
905 return 1;
907 default:
908 break;
911 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
913 if (fmt[i] == 'e')
915 /* If we are about to do the last recursive call needed at this
916 level, change it into iteration. This function is called enough
917 to be worth it. */
918 if (i == 0)
919 return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
921 else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
922 return 0;
924 else if (fmt[i] == 'E')
925 for (j = 0; j < XVECLEN (x, i); j++)
926 if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
927 return 0;
930 return 1;
933 /* Info passed from load_killed_in_block_p to mems_conflict_for_gcse_p. */
935 struct mem_conflict_info
937 /* A memory reference for a load instruction, mems_conflict_for_gcse_p will
938 see if a memory store conflicts with this memory load. */
939 const_rtx mem;
941 /* True if mems_conflict_for_gcse_p finds a conflict between two memory
942 references. */
943 bool conflict;
946 /* DEST is the output of an instruction. If it is a memory reference and
947 possibly conflicts with the load found in DATA, then communicate this
948 information back through DATA. */
950 static void
951 mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
952 void *data)
954 struct mem_conflict_info *mci = (struct mem_conflict_info *) data;
956 while (GET_CODE (dest) == SUBREG
957 || GET_CODE (dest) == ZERO_EXTRACT
958 || GET_CODE (dest) == STRICT_LOW_PART)
959 dest = XEXP (dest, 0);
961 /* If DEST is not a MEM, then it will not conflict with the load. Note
962 that function calls are assumed to clobber memory, but are handled
963 elsewhere. */
964 if (! MEM_P (dest))
965 return;
967 /* If we are setting a MEM in our list of specially recognized MEMs,
968 don't mark as killed this time. */
969 if (pre_ldst_mems != NULL && expr_equiv_p (dest, mci->mem))
971 if (!find_rtx_in_ldst (dest))
972 mci->conflict = true;
973 return;
976 if (true_dependence (dest, GET_MODE (dest), mci->mem))
977 mci->conflict = true;
980 /* Return nonzero if the expression in X (a memory reference) is killed
981 in block BB before or after the insn with the LUID in UID_LIMIT.
982 AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
983 before UID_LIMIT.
985 To check the entire block, set UID_LIMIT to max_uid + 1 and
986 AVAIL_P to 0. */
988 static int
989 load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x,
990 int avail_p)
992 VEC (rtx,heap) *list = modify_mem_list[bb->index];
993 rtx setter;
994 unsigned ix;
996 /* If this is a readonly then we aren't going to be changing it. */
997 if (MEM_READONLY_P (x))
998 return 0;
1000 FOR_EACH_VEC_ELT_REVERSE (rtx, list, ix, setter)
1002 struct mem_conflict_info mci;
1004 /* Ignore entries in the list that do not apply. */
1005 if ((avail_p
1006 && DF_INSN_LUID (setter) < uid_limit)
1007 || (! avail_p
1008 && DF_INSN_LUID (setter) > uid_limit))
1009 continue;
1011 /* If SETTER is a call everything is clobbered. Note that calls
1012 to pure functions are never put on the list, so we need not
1013 worry about them. */
1014 if (CALL_P (setter))
1015 return 1;
1017 /* SETTER must be an INSN of some kind that sets memory. Call
1018 note_stores to examine each hunk of memory that is modified. */
1019 mci.mem = x;
1020 mci.conflict = false;
1021 note_stores (PATTERN (setter), mems_conflict_for_gcse_p, &mci);
1022 if (mci.conflict)
1023 return 1;
1025 return 0;
1028 /* Return nonzero if the operands of expression X are unchanged from
1029 the start of INSN's basic block up to but not including INSN. */
1031 static int
1032 oprs_anticipatable_p (const_rtx x, const_rtx insn)
1034 return oprs_unchanged_p (x, insn, 0);
1037 /* Return nonzero if the operands of expression X are unchanged from
1038 INSN to the end of INSN's basic block. */
1040 static int
1041 oprs_available_p (const_rtx x, const_rtx insn)
1043 return oprs_unchanged_p (x, insn, 1);
1046 /* Hash expression X.
1048 MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean
1049 indicating if a volatile operand is found or if the expression contains
1050 something we don't want to insert in the table. HASH_TABLE_SIZE is
1051 the current size of the hash table to be probed. */
1053 static unsigned int
1054 hash_expr (const_rtx x, enum machine_mode mode, int *do_not_record_p,
1055 int hash_table_size)
1057 unsigned int hash;
1059 *do_not_record_p = 0;
1061 hash = hash_rtx (x, mode, do_not_record_p, NULL, /*have_reg_qty=*/false);
1062 return hash % hash_table_size;
1065 /* Return nonzero if exp1 is equivalent to exp2. */
1067 static int
1068 expr_equiv_p (const_rtx x, const_rtx y)
1070 return exp_equiv_p (x, y, 0, true);
1073 /* Insert expression X in INSN in the hash TABLE.
1074 If it is already present, record it as the last occurrence in INSN's
1075 basic block.
1077 MODE is the mode of the value X is being stored into.
1078 It is only used if X is a CONST_INT.
1080 ANTIC_P is nonzero if X is an anticipatable expression.
1081 AVAIL_P is nonzero if X is an available expression.
1083 MAX_DISTANCE is the maximum distance in instructions this expression can
1084 be moved. */
1086 static void
1087 insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
1088 int avail_p, int max_distance, struct hash_table_d *table)
1090 int found, do_not_record_p;
1091 unsigned int hash;
1092 struct expr *cur_expr, *last_expr = NULL;
1093 struct occr *antic_occr, *avail_occr;
1095 hash = hash_expr (x, mode, &do_not_record_p, table->size);
1097 /* Do not insert expression in table if it contains volatile operands,
1098 or if hash_expr determines the expression is something we don't want
1099 to or can't handle. */
1100 if (do_not_record_p)
1101 return;
1103 cur_expr = table->table[hash];
1104 found = 0;
1106 while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1108 /* If the expression isn't found, save a pointer to the end of
1109 the list. */
1110 last_expr = cur_expr;
1111 cur_expr = cur_expr->next_same_hash;
1114 if (! found)
1116 cur_expr = GOBNEW (struct expr);
1117 bytes_used += sizeof (struct expr);
1118 if (table->table[hash] == NULL)
1119 /* This is the first pattern that hashed to this index. */
1120 table->table[hash] = cur_expr;
1121 else
1122 /* Add EXPR to end of this hash chain. */
1123 last_expr->next_same_hash = cur_expr;
1125 /* Set the fields of the expr element. */
1126 cur_expr->expr = x;
1127 cur_expr->bitmap_index = table->n_elems++;
1128 cur_expr->next_same_hash = NULL;
1129 cur_expr->antic_occr = NULL;
1130 cur_expr->avail_occr = NULL;
1131 gcc_assert (max_distance >= 0);
1132 cur_expr->max_distance = max_distance;
1134 else
1135 gcc_assert (cur_expr->max_distance == max_distance);
1137 /* Now record the occurrence(s). */
1138 if (antic_p)
1140 antic_occr = cur_expr->antic_occr;
1142 if (antic_occr
1143 && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn))
1144 antic_occr = NULL;
1146 if (antic_occr)
1147 /* Found another instance of the expression in the same basic block.
1148 Prefer the currently recorded one. We want the first one in the
1149 block and the block is scanned from start to end. */
1150 ; /* nothing to do */
1151 else
1153 /* First occurrence of this expression in this basic block. */
1154 antic_occr = GOBNEW (struct occr);
1155 bytes_used += sizeof (struct occr);
1156 antic_occr->insn = insn;
1157 antic_occr->next = cur_expr->antic_occr;
1158 antic_occr->deleted_p = 0;
1159 cur_expr->antic_occr = antic_occr;
1163 if (avail_p)
1165 avail_occr = cur_expr->avail_occr;
1167 if (avail_occr
1168 && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn))
1170 /* Found another instance of the expression in the same basic block.
1171 Prefer this occurrence to the currently recorded one. We want
1172 the last one in the block and the block is scanned from start
1173 to end. */
1174 avail_occr->insn = insn;
1176 else
1178 /* First occurrence of this expression in this basic block. */
1179 avail_occr = GOBNEW (struct occr);
1180 bytes_used += sizeof (struct occr);
1181 avail_occr->insn = insn;
1182 avail_occr->next = cur_expr->avail_occr;
1183 avail_occr->deleted_p = 0;
1184 cur_expr->avail_occr = avail_occr;
1189 /* Scan SET present in INSN and add an entry to the hash TABLE. */
1191 static void
1192 hash_scan_set (rtx set, rtx insn, struct hash_table_d *table)
1194 rtx src = SET_SRC (set);
1195 rtx dest = SET_DEST (set);
1196 rtx note;
1198 if (GET_CODE (src) == CALL)
1199 hash_scan_call (src, insn, table);
1201 else if (REG_P (dest))
1203 unsigned int regno = REGNO (dest);
1204 int max_distance = 0;
1206 /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
1208 This allows us to do a single GCSE pass and still eliminate
1209 redundant constants, addresses or other expressions that are
1210 constructed with multiple instructions.
1212 However, keep the original SRC if INSN is a simple reg-reg move.
1213 In this case, there will almost always be a REG_EQUAL note on the
1214 insn that sets SRC. By recording the REG_EQUAL value here as SRC
1215 for INSN, we miss copy propagation opportunities and we perform the
1216 same PRE GCSE operation repeatedly on the same REG_EQUAL value if we
1217 do more than one PRE GCSE pass.
1219 Note that this does not impede profitable constant propagations. We
1220 "look through" reg-reg sets in lookup_avail_set. */
1221 note = find_reg_equal_equiv_note (insn);
1222 if (note != 0
1223 && REG_NOTE_KIND (note) == REG_EQUAL
1224 && !REG_P (src)
1225 && want_to_gcse_p (XEXP (note, 0), NULL))
1226 src = XEXP (note, 0), set = gen_rtx_SET (VOIDmode, dest, src);
1228 /* Only record sets of pseudo-regs in the hash table. */
1229 if (regno >= FIRST_PSEUDO_REGISTER
1230 /* Don't GCSE something if we can't do a reg/reg copy. */
1231 && can_copy_p (GET_MODE (dest))
1232 /* GCSE commonly inserts instruction after the insn. We can't
1233 do that easily for EH edges so disable GCSE on these for now. */
1234 /* ??? We can now easily create new EH landing pads at the
1235 gimple level, for splitting edges; there's no reason we
1236 can't do the same thing at the rtl level. */
1237 && !can_throw_internal (insn)
1238 /* Is SET_SRC something we want to gcse? */
1239 && want_to_gcse_p (src, &max_distance)
1240 /* Don't CSE a nop. */
1241 && ! set_noop_p (set)
1242 /* Don't GCSE if it has attached REG_EQUIV note.
1243 At this point this only function parameters should have
1244 REG_EQUIV notes and if the argument slot is used somewhere
1245 explicitly, it means address of parameter has been taken,
1246 so we should not extend the lifetime of the pseudo. */
1247 && (note == NULL_RTX || ! MEM_P (XEXP (note, 0))))
1249 /* An expression is not anticipatable if its operands are
1250 modified before this insn or if this is not the only SET in
1251 this insn. The latter condition does not have to mean that
1252 SRC itself is not anticipatable, but we just will not be
1253 able to handle code motion of insns with multiple sets. */
1254 int antic_p = oprs_anticipatable_p (src, insn)
1255 && !multiple_sets (insn);
1256 /* An expression is not available if its operands are
1257 subsequently modified, including this insn. It's also not
1258 available if this is a branch, because we can't insert
1259 a set after the branch. */
1260 int avail_p = (oprs_available_p (src, insn)
1261 && ! JUMP_P (insn));
1263 insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p,
1264 max_distance, table);
1267 /* In case of store we want to consider the memory value as available in
1268 the REG stored in that memory. This makes it possible to remove
1269 redundant loads from due to stores to the same location. */
1270 else if (flag_gcse_las && REG_P (src) && MEM_P (dest))
1272 unsigned int regno = REGNO (src);
1273 int max_distance = 0;
1275 /* Only record sets of pseudo-regs in the hash table. */
1276 if (regno >= FIRST_PSEUDO_REGISTER
1277 /* Don't GCSE something if we can't do a reg/reg copy. */
1278 && can_copy_p (GET_MODE (src))
1279 /* GCSE commonly inserts instruction after the insn. We can't
1280 do that easily for EH edges so disable GCSE on these for now. */
1281 && !can_throw_internal (insn)
1282 /* Is SET_DEST something we want to gcse? */
1283 && want_to_gcse_p (dest, &max_distance)
1284 /* Don't CSE a nop. */
1285 && ! set_noop_p (set)
1286 /* Don't GCSE if it has attached REG_EQUIV note.
1287 At this point this only function parameters should have
1288 REG_EQUIV notes and if the argument slot is used somewhere
1289 explicitly, it means address of parameter has been taken,
1290 so we should not extend the lifetime of the pseudo. */
1291 && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
1292 || ! MEM_P (XEXP (note, 0))))
1294 /* Stores are never anticipatable. */
1295 int antic_p = 0;
1296 /* An expression is not available if its operands are
1297 subsequently modified, including this insn. It's also not
1298 available if this is a branch, because we can't insert
1299 a set after the branch. */
1300 int avail_p = oprs_available_p (dest, insn)
1301 && ! JUMP_P (insn);
1303 /* Record the memory expression (DEST) in the hash table. */
1304 insert_expr_in_table (dest, GET_MODE (dest), insn,
1305 antic_p, avail_p, max_distance, table);
1310 static void
1311 hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1312 struct hash_table_d *table ATTRIBUTE_UNUSED)
1314 /* Currently nothing to do. */
1317 static void
1318 hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1319 struct hash_table_d *table ATTRIBUTE_UNUSED)
1321 /* Currently nothing to do. */
1324 /* Process INSN and add hash table entries as appropriate. */
1326 static void
1327 hash_scan_insn (rtx insn, struct hash_table_d *table)
1329 rtx pat = PATTERN (insn);
1330 int i;
1332 /* Pick out the sets of INSN and for other forms of instructions record
1333 what's been modified. */
1335 if (GET_CODE (pat) == SET)
1336 hash_scan_set (pat, insn, table);
1338 else if (GET_CODE (pat) == CLOBBER)
1339 hash_scan_clobber (pat, insn, table);
1341 else if (GET_CODE (pat) == CALL)
1342 hash_scan_call (pat, insn, table);
1344 else if (GET_CODE (pat) == PARALLEL)
1345 for (i = 0; i < XVECLEN (pat, 0); i++)
1347 rtx x = XVECEXP (pat, 0, i);
1349 if (GET_CODE (x) == SET)
1350 hash_scan_set (x, insn, table);
1351 else if (GET_CODE (x) == CLOBBER)
1352 hash_scan_clobber (x, insn, table);
1353 else if (GET_CODE (x) == CALL)
1354 hash_scan_call (x, insn, table);
1358 /* Dump the hash table TABLE to file FILE under the name NAME. */
1360 static void
1361 dump_hash_table (FILE *file, const char *name, struct hash_table_d *table)
1363 int i;
1364 /* Flattened out table, so it's printed in proper order. */
1365 struct expr **flat_table;
1366 unsigned int *hash_val;
1367 struct expr *expr;
1369 flat_table = XCNEWVEC (struct expr *, table->n_elems);
1370 hash_val = XNEWVEC (unsigned int, table->n_elems);
1372 for (i = 0; i < (int) table->size; i++)
1373 for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
1375 flat_table[expr->bitmap_index] = expr;
1376 hash_val[expr->bitmap_index] = i;
1379 fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1380 name, table->size, table->n_elems);
1382 for (i = 0; i < (int) table->n_elems; i++)
1383 if (flat_table[i] != 0)
1385 expr = flat_table[i];
1386 fprintf (file, "Index %d (hash value %d; max distance %d)\n ",
1387 expr->bitmap_index, hash_val[i], expr->max_distance);
1388 print_rtl (file, expr->expr);
1389 fprintf (file, "\n");
1392 fprintf (file, "\n");
1394 free (flat_table);
1395 free (hash_val);
1398 /* Record register first/last/block set information for REGNO in INSN.
1400 first_set records the first place in the block where the register
1401 is set and is used to compute "anticipatability".
1403 last_set records the last place in the block where the register
1404 is set and is used to compute "availability".
1406 last_bb records the block for which first_set and last_set are
1407 valid, as a quick test to invalidate them. */
1409 static void
1410 record_last_reg_set_info (rtx insn, int regno)
1412 struct reg_avail_info *info = &reg_avail_info[regno];
1413 int luid = DF_INSN_LUID (insn);
1415 info->last_set = luid;
1416 if (info->last_bb != current_bb)
1418 info->last_bb = current_bb;
1419 info->first_set = luid;
1423 /* Record all of the canonicalized MEMs of record_last_mem_set_info's insn.
1424 Note we store a pair of elements in the list, so they have to be
1425 taken off pairwise. */
1427 static void
1428 canon_list_insert (rtx dest ATTRIBUTE_UNUSED, const_rtx x ATTRIBUTE_UNUSED,
1429 void * v_insn)
1431 rtx dest_addr, insn;
1432 int bb;
1433 modify_pair *pair;
1435 while (GET_CODE (dest) == SUBREG
1436 || GET_CODE (dest) == ZERO_EXTRACT
1437 || GET_CODE (dest) == STRICT_LOW_PART)
1438 dest = XEXP (dest, 0);
1440 /* If DEST is not a MEM, then it will not conflict with a load. Note
1441 that function calls are assumed to clobber memory, but are handled
1442 elsewhere. */
1444 if (! MEM_P (dest))
1445 return;
1447 dest_addr = get_addr (XEXP (dest, 0));
1448 dest_addr = canon_rtx (dest_addr);
1449 insn = (rtx) v_insn;
1450 bb = BLOCK_FOR_INSN (insn)->index;
1452 pair = VEC_safe_push (modify_pair, heap, canon_modify_mem_list[bb], NULL);
1453 pair->dest = dest;
1454 pair->dest_addr = dest_addr;
1457 /* Record memory modification information for INSN. We do not actually care
1458 about the memory location(s) that are set, or even how they are set (consider
1459 a CALL_INSN). We merely need to record which insns modify memory. */
1461 static void
1462 record_last_mem_set_info (rtx insn)
1464 int bb = BLOCK_FOR_INSN (insn)->index;
1466 /* load_killed_in_block_p will handle the case of calls clobbering
1467 everything. */
1468 VEC_safe_push (rtx, heap, modify_mem_list[bb], insn);
1469 bitmap_set_bit (modify_mem_list_set, bb);
1471 if (CALL_P (insn))
1472 bitmap_set_bit (blocks_with_calls, bb);
1473 else
1474 note_stores (PATTERN (insn), canon_list_insert, (void*) insn);
1477 /* Called from compute_hash_table via note_stores to handle one
1478 SET or CLOBBER in an insn. DATA is really the instruction in which
1479 the SET is taking place. */
1481 static void
1482 record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
1484 rtx last_set_insn = (rtx) data;
1486 if (GET_CODE (dest) == SUBREG)
1487 dest = SUBREG_REG (dest);
1489 if (REG_P (dest))
1490 record_last_reg_set_info (last_set_insn, REGNO (dest));
1491 else if (MEM_P (dest)
1492 /* Ignore pushes, they clobber nothing. */
1493 && ! push_operand (dest, GET_MODE (dest)))
1494 record_last_mem_set_info (last_set_insn);
1497 /* Top level function to create an expression hash table.
1499 Expression entries are placed in the hash table if
1500 - they are of the form (set (pseudo-reg) src),
1501 - src is something we want to perform GCSE on,
1502 - none of the operands are subsequently modified in the block
1504 Currently src must be a pseudo-reg or a const_int.
1506 TABLE is the table computed. */
1508 static void
1509 compute_hash_table_work (struct hash_table_d *table)
1511 int i;
1513 /* re-Cache any INSN_LIST nodes we have allocated. */
1514 clear_modify_mem_tables ();
1515 /* Some working arrays used to track first and last set in each block. */
1516 reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ());
1518 for (i = 0; i < max_reg_num (); ++i)
1519 reg_avail_info[i].last_bb = NULL;
1521 FOR_EACH_BB (current_bb)
1523 rtx insn;
1524 unsigned int regno;
1526 /* First pass over the instructions records information used to
1527 determine when registers and memory are first and last set. */
1528 FOR_BB_INSNS (current_bb, insn)
1530 if (!NONDEBUG_INSN_P (insn))
1531 continue;
1533 if (CALL_P (insn))
1535 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1536 if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
1537 record_last_reg_set_info (insn, regno);
1539 if (! RTL_CONST_OR_PURE_CALL_P (insn))
1540 record_last_mem_set_info (insn);
1543 note_stores (PATTERN (insn), record_last_set_info, insn);
1546 /* The next pass builds the hash table. */
1547 FOR_BB_INSNS (current_bb, insn)
1548 if (NONDEBUG_INSN_P (insn))
1549 hash_scan_insn (insn, table);
1552 free (reg_avail_info);
1553 reg_avail_info = NULL;
1556 /* Allocate space for the set/expr hash TABLE.
1557 It is used to determine the number of buckets to use. */
1559 static void
1560 alloc_hash_table (struct hash_table_d *table)
1562 int n;
1564 n = get_max_insn_count ();
1566 table->size = n / 4;
1567 if (table->size < 11)
1568 table->size = 11;
1570 /* Attempt to maintain efficient use of hash table.
1571 Making it an odd number is simplest for now.
1572 ??? Later take some measurements. */
1573 table->size |= 1;
1574 n = table->size * sizeof (struct expr *);
1575 table->table = GNEWVAR (struct expr *, n);
1578 /* Free things allocated by alloc_hash_table. */
1580 static void
1581 free_hash_table (struct hash_table_d *table)
1583 free (table->table);
1586 /* Compute the expression hash table TABLE. */
1588 static void
1589 compute_hash_table (struct hash_table_d *table)
1591 /* Initialize count of number of entries in hash table. */
1592 table->n_elems = 0;
1593 memset (table->table, 0, table->size * sizeof (struct expr *));
1595 compute_hash_table_work (table);
1598 /* Expression tracking support. */
1600 /* Clear canon_modify_mem_list and modify_mem_list tables. */
1601 static void
1602 clear_modify_mem_tables (void)
1604 unsigned i;
1605 bitmap_iterator bi;
1607 EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
1609 VEC_free (rtx, heap, modify_mem_list[i]);
1610 VEC_free (modify_pair, heap, canon_modify_mem_list[i]);
1612 bitmap_clear (modify_mem_list_set);
1613 bitmap_clear (blocks_with_calls);
1616 /* Release memory used by modify_mem_list_set. */
1618 static void
1619 free_modify_mem_tables (void)
1621 clear_modify_mem_tables ();
1622 free (modify_mem_list);
1623 free (canon_modify_mem_list);
1624 modify_mem_list = 0;
1625 canon_modify_mem_list = 0;
1628 /* For each block, compute whether X is transparent. X is either an
1629 expression or an assignment [though we don't care which, for this context
1630 an assignment is treated as an expression]. For each block where an
1631 element of X is modified, reset the INDX bit in BMAP. */
1633 static void
1634 compute_transp (const_rtx x, int indx, sbitmap *bmap)
1636 int i, j;
1637 enum rtx_code code;
1638 const char *fmt;
1640 /* repeat is used to turn tail-recursion into iteration since GCC
1641 can't do it when there's no return value. */
1642 repeat:
1644 if (x == 0)
1645 return;
1647 code = GET_CODE (x);
1648 switch (code)
1650 case REG:
1652 df_ref def;
1653 for (def = DF_REG_DEF_CHAIN (REGNO (x));
1654 def;
1655 def = DF_REF_NEXT_REG (def))
1656 RESET_BIT (bmap[DF_REF_BB (def)->index], indx);
1659 return;
1661 case MEM:
1662 if (! MEM_READONLY_P (x))
1664 bitmap_iterator bi;
1665 unsigned bb_index;
1666 rtx x_addr;
1668 x_addr = get_addr (XEXP (x, 0));
1669 x_addr = canon_rtx (x_addr);
1671 /* First handle all the blocks with calls. We don't need to
1672 do any list walking for them. */
1673 EXECUTE_IF_SET_IN_BITMAP (blocks_with_calls, 0, bb_index, bi)
1675 RESET_BIT (bmap[bb_index], indx);
1678 /* Now iterate over the blocks which have memory modifications
1679 but which do not have any calls. */
1680 EXECUTE_IF_AND_COMPL_IN_BITMAP (modify_mem_list_set,
1681 blocks_with_calls,
1682 0, bb_index, bi)
1684 VEC (modify_pair,heap) *list
1685 = canon_modify_mem_list[bb_index];
1686 modify_pair *pair;
1687 unsigned ix;
1689 FOR_EACH_VEC_ELT_REVERSE (modify_pair, list, ix, pair)
1691 rtx dest = pair->dest;
1692 rtx dest_addr = pair->dest_addr;
1694 if (canon_true_dependence (dest, GET_MODE (dest),
1695 dest_addr, x, x_addr))
1696 RESET_BIT (bmap[bb_index], indx);
1701 x = XEXP (x, 0);
1702 goto repeat;
1704 case PC:
1705 case CC0: /*FIXME*/
1706 case CONST:
1707 case CONST_INT:
1708 case CONST_DOUBLE:
1709 case CONST_FIXED:
1710 case CONST_VECTOR:
1711 case SYMBOL_REF:
1712 case LABEL_REF:
1713 case ADDR_VEC:
1714 case ADDR_DIFF_VEC:
1715 return;
1717 default:
1718 break;
1721 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1723 if (fmt[i] == 'e')
1725 /* If we are about to do the last recursive call
1726 needed at this level, change it into iteration.
1727 This function is called enough to be worth it. */
1728 if (i == 0)
1730 x = XEXP (x, i);
1731 goto repeat;
1734 compute_transp (XEXP (x, i), indx, bmap);
1736 else if (fmt[i] == 'E')
1737 for (j = 0; j < XVECLEN (x, i); j++)
1738 compute_transp (XVECEXP (x, i, j), indx, bmap);
1742 /* Compute PRE+LCM working variables. */
1744 /* Local properties of expressions. */
1746 /* Nonzero for expressions that are transparent in the block. */
1747 static sbitmap *transp;
1749 /* Nonzero for expressions that are computed (available) in the block. */
1750 static sbitmap *comp;
1752 /* Nonzero for expressions that are locally anticipatable in the block. */
1753 static sbitmap *antloc;
1755 /* Nonzero for expressions where this block is an optimal computation
1756 point. */
1757 static sbitmap *pre_optimal;
1759 /* Nonzero for expressions which are redundant in a particular block. */
1760 static sbitmap *pre_redundant;
1762 /* Nonzero for expressions which should be inserted on a specific edge. */
1763 static sbitmap *pre_insert_map;
1765 /* Nonzero for expressions which should be deleted in a specific block. */
1766 static sbitmap *pre_delete_map;
1768 /* Allocate vars used for PRE analysis. */
1770 static void
1771 alloc_pre_mem (int n_blocks, int n_exprs)
1773 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
1774 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
1775 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
1777 pre_optimal = NULL;
1778 pre_redundant = NULL;
1779 pre_insert_map = NULL;
1780 pre_delete_map = NULL;
1781 ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
1783 /* pre_insert and pre_delete are allocated later. */
1786 /* Free vars used for PRE analysis. */
1788 static void
1789 free_pre_mem (void)
1791 sbitmap_vector_free (transp);
1792 sbitmap_vector_free (comp);
1794 /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */
1796 if (pre_optimal)
1797 sbitmap_vector_free (pre_optimal);
1798 if (pre_redundant)
1799 sbitmap_vector_free (pre_redundant);
1800 if (pre_insert_map)
1801 sbitmap_vector_free (pre_insert_map);
1802 if (pre_delete_map)
1803 sbitmap_vector_free (pre_delete_map);
1805 transp = comp = NULL;
1806 pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
1809 /* Remove certain expressions from anticipatable and transparent
1810 sets of basic blocks that have incoming abnormal edge.
1811 For PRE remove potentially trapping expressions to avoid placing
1812 them on abnormal edges. For hoisting remove memory references that
1813 can be clobbered by calls. */
1815 static void
1816 prune_expressions (bool pre_p)
1818 sbitmap prune_exprs;
1819 struct expr *expr;
1820 unsigned int ui;
1821 basic_block bb;
1823 prune_exprs = sbitmap_alloc (expr_hash_table.n_elems);
1824 sbitmap_zero (prune_exprs);
1825 for (ui = 0; ui < expr_hash_table.size; ui++)
1827 for (expr = expr_hash_table.table[ui]; expr; expr = expr->next_same_hash)
1829 /* Note potentially trapping expressions. */
1830 if (may_trap_p (expr->expr))
1832 SET_BIT (prune_exprs, expr->bitmap_index);
1833 continue;
1836 if (!pre_p && MEM_P (expr->expr))
1837 /* Note memory references that can be clobbered by a call.
1838 We do not split abnormal edges in hoisting, so would
1839 a memory reference get hoisted along an abnormal edge,
1840 it would be placed /before/ the call. Therefore, only
1841 constant memory references can be hoisted along abnormal
1842 edges. */
1844 if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
1845 && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
1846 continue;
1848 if (MEM_READONLY_P (expr->expr)
1849 && !MEM_VOLATILE_P (expr->expr)
1850 && MEM_NOTRAP_P (expr->expr))
1851 /* Constant memory reference, e.g., a PIC address. */
1852 continue;
1854 /* ??? Optimally, we would use interprocedural alias
1855 analysis to determine if this mem is actually killed
1856 by this call. */
1858 SET_BIT (prune_exprs, expr->bitmap_index);
1863 FOR_EACH_BB (bb)
1865 edge e;
1866 edge_iterator ei;
1868 /* If the current block is the destination of an abnormal edge, we
1869 kill all trapping (for PRE) and memory (for hoist) expressions
1870 because we won't be able to properly place the instruction on
1871 the edge. So make them neither anticipatable nor transparent.
1872 This is fairly conservative.
1874 ??? For hoisting it may be necessary to check for set-and-jump
1875 instructions here, not just for abnormal edges. The general problem
1876 is that when an expression cannot not be placed right at the end of
1877 a basic block we should account for any side-effects of a subsequent
1878 jump instructions that could clobber the expression. It would
1879 be best to implement this check along the lines of
1880 hoist_expr_reaches_here_p where the target block is already known
1881 and, hence, there's no need to conservatively prune expressions on
1882 "intermediate" set-and-jump instructions. */
1883 FOR_EACH_EDGE (e, ei, bb->preds)
1884 if ((e->flags & EDGE_ABNORMAL)
1885 && (pre_p || CALL_P (BB_END (e->src))))
1887 sbitmap_difference (antloc[bb->index],
1888 antloc[bb->index], prune_exprs);
1889 sbitmap_difference (transp[bb->index],
1890 transp[bb->index], prune_exprs);
1891 break;
1895 sbitmap_free (prune_exprs);
1898 /* It may be necessary to insert a large number of insns on edges to
1899 make the existing occurrences of expressions fully redundant. This
1900 routine examines the set of insertions and deletions and if the ratio
1901 of insertions to deletions is too high for a particular expression, then
1902 the expression is removed from the insertion/deletion sets.
1904 N_ELEMS is the number of elements in the hash table. */
1906 static void
1907 prune_insertions_deletions (int n_elems)
1909 sbitmap_iterator sbi;
1910 sbitmap prune_exprs;
1912 /* We always use I to iterate over blocks/edges and J to iterate over
1913 expressions. */
1914 unsigned int i, j;
1916 /* Counts for the number of times an expression needs to be inserted and
1917 number of times an expression can be removed as a result. */
1918 int *insertions = GCNEWVEC (int, n_elems);
1919 int *deletions = GCNEWVEC (int, n_elems);
1921 /* Set of expressions which require too many insertions relative to
1922 the number of deletions achieved. We will prune these out of the
1923 insertion/deletion sets. */
1924 prune_exprs = sbitmap_alloc (n_elems);
1925 sbitmap_zero (prune_exprs);
1927 /* Iterate over the edges counting the number of times each expression
1928 needs to be inserted. */
1929 for (i = 0; i < (unsigned) n_edges; i++)
1931 EXECUTE_IF_SET_IN_SBITMAP (pre_insert_map[i], 0, j, sbi)
1932 insertions[j]++;
1935 /* Similarly for deletions, but those occur in blocks rather than on
1936 edges. */
1937 for (i = 0; i < (unsigned) last_basic_block; i++)
1939 EXECUTE_IF_SET_IN_SBITMAP (pre_delete_map[i], 0, j, sbi)
1940 deletions[j]++;
1943 /* Now that we have accurate counts, iterate over the elements in the
1944 hash table and see if any need too many insertions relative to the
1945 number of evaluations that can be removed. If so, mark them in
1946 PRUNE_EXPRS. */
1947 for (j = 0; j < (unsigned) n_elems; j++)
1948 if (deletions[j]
1949 && ((unsigned) insertions[j] / deletions[j]) > MAX_GCSE_INSERTION_RATIO)
1950 SET_BIT (prune_exprs, j);
1952 /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS. */
1953 EXECUTE_IF_SET_IN_SBITMAP (prune_exprs, 0, j, sbi)
1955 for (i = 0; i < (unsigned) n_edges; i++)
1956 RESET_BIT (pre_insert_map[i], j);
1958 for (i = 0; i < (unsigned) last_basic_block; i++)
1959 RESET_BIT (pre_delete_map[i], j);
1962 sbitmap_free (prune_exprs);
1963 free (insertions);
1964 free (deletions);
1967 /* Top level routine to do the dataflow analysis needed by PRE. */
1969 static struct edge_list *
1970 compute_pre_data (void)
1972 struct edge_list *edge_list;
1973 basic_block bb;
1975 compute_local_properties (transp, comp, antloc, &expr_hash_table);
1976 prune_expressions (true);
1977 sbitmap_vector_zero (ae_kill, last_basic_block);
1979 /* Compute ae_kill for each basic block using:
1981 ~(TRANSP | COMP)
1984 FOR_EACH_BB (bb)
1986 sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
1987 sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
1990 edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc,
1991 ae_kill, &pre_insert_map, &pre_delete_map);
1992 sbitmap_vector_free (antloc);
1993 antloc = NULL;
1994 sbitmap_vector_free (ae_kill);
1995 ae_kill = NULL;
1997 prune_insertions_deletions (expr_hash_table.n_elems);
1999 return edge_list;
2002 /* PRE utilities */
2004 /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach
2005 block BB.
2007 VISITED is a pointer to a working buffer for tracking which BB's have
2008 been visited. It is NULL for the top-level call.
2010 We treat reaching expressions that go through blocks containing the same
2011 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
2012 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2013 2 as not reaching. The intent is to improve the probability of finding
2014 only one reaching expression and to reduce register lifetimes by picking
2015 the closest such expression. */
2017 static int
2018 pre_expr_reaches_here_p_work (basic_block occr_bb, struct expr *expr,
2019 basic_block bb, char *visited)
2021 edge pred;
2022 edge_iterator ei;
2024 FOR_EACH_EDGE (pred, ei, bb->preds)
2026 basic_block pred_bb = pred->src;
2028 if (pred->src == ENTRY_BLOCK_PTR
2029 /* Has predecessor has already been visited? */
2030 || visited[pred_bb->index])
2031 ;/* Nothing to do. */
2033 /* Does this predecessor generate this expression? */
2034 else if (TEST_BIT (comp[pred_bb->index], expr->bitmap_index))
2036 /* Is this the occurrence we're looking for?
2037 Note that there's only one generating occurrence per block
2038 so we just need to check the block number. */
2039 if (occr_bb == pred_bb)
2040 return 1;
2042 visited[pred_bb->index] = 1;
2044 /* Ignore this predecessor if it kills the expression. */
2045 else if (! TEST_BIT (transp[pred_bb->index], expr->bitmap_index))
2046 visited[pred_bb->index] = 1;
2048 /* Neither gen nor kill. */
2049 else
2051 visited[pred_bb->index] = 1;
2052 if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
2053 return 1;
2057 /* All paths have been checked. */
2058 return 0;
2061 /* The wrapper for pre_expr_reaches_here_work that ensures that any
2062 memory allocated for that function is returned. */
2064 static int
2065 pre_expr_reaches_here_p (basic_block occr_bb, struct expr *expr, basic_block bb)
2067 int rval;
2068 char *visited = XCNEWVEC (char, last_basic_block);
2070 rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
2072 free (visited);
2073 return rval;
2076 /* Generate RTL to copy an EXPR to its `reaching_reg' and return it. */
2078 static rtx
2079 process_insert_insn (struct expr *expr)
2081 rtx reg = expr->reaching_reg;
2082 /* Copy the expression to make sure we don't have any sharing issues. */
2083 rtx exp = copy_rtx (expr->expr);
2084 rtx pat;
2086 start_sequence ();
2088 /* If the expression is something that's an operand, like a constant,
2089 just copy it to a register. */
2090 if (general_operand (exp, GET_MODE (reg)))
2091 emit_move_insn (reg, exp);
2093 /* Otherwise, make a new insn to compute this expression and make sure the
2094 insn will be recognized (this also adds any needed CLOBBERs). */
2095 else
2097 rtx insn = emit_insn (gen_rtx_SET (VOIDmode, reg, exp));
2099 if (insn_invalid_p (insn))
2100 gcc_unreachable ();
2103 pat = get_insns ();
2104 end_sequence ();
2106 return pat;
2109 /* Add EXPR to the end of basic block BB.
2111 This is used by both the PRE and code hoisting. */
2113 static void
2114 insert_insn_end_basic_block (struct expr *expr, basic_block bb)
2116 rtx insn = BB_END (bb);
2117 rtx new_insn;
2118 rtx reg = expr->reaching_reg;
2119 int regno = REGNO (reg);
2120 rtx pat, pat_end;
2122 pat = process_insert_insn (expr);
2123 gcc_assert (pat && INSN_P (pat));
2125 pat_end = pat;
2126 while (NEXT_INSN (pat_end) != NULL_RTX)
2127 pat_end = NEXT_INSN (pat_end);
2129 /* If the last insn is a jump, insert EXPR in front [taking care to
2130 handle cc0, etc. properly]. Similarly we need to care trapping
2131 instructions in presence of non-call exceptions. */
2133 if (JUMP_P (insn)
2134 || (NONJUMP_INSN_P (insn)
2135 && (!single_succ_p (bb)
2136 || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
2138 #ifdef HAVE_cc0
2139 rtx note;
2140 #endif
2142 /* If this is a jump table, then we can't insert stuff here. Since
2143 we know the previous real insn must be the tablejump, we insert
2144 the new instruction just before the tablejump. */
2145 if (GET_CODE (PATTERN (insn)) == ADDR_VEC
2146 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
2147 insn = prev_active_insn (insn);
2149 #ifdef HAVE_cc0
2150 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
2151 if cc0 isn't set. */
2152 note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
2153 if (note)
2154 insn = XEXP (note, 0);
2155 else
2157 rtx maybe_cc0_setter = prev_nonnote_insn (insn);
2158 if (maybe_cc0_setter
2159 && INSN_P (maybe_cc0_setter)
2160 && sets_cc0_p (PATTERN (maybe_cc0_setter)))
2161 insn = maybe_cc0_setter;
2163 #endif
2164 /* FIXME: What if something in cc0/jump uses value set in new insn? */
2165 new_insn = emit_insn_before_noloc (pat, insn, bb);
2168 /* Likewise if the last insn is a call, as will happen in the presence
2169 of exception handling. */
2170 else if (CALL_P (insn)
2171 && (!single_succ_p (bb)
2172 || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
2174 /* Keeping in mind targets with small register classes and parameters
2175 in registers, we search backward and place the instructions before
2176 the first parameter is loaded. Do this for everyone for consistency
2177 and a presumption that we'll get better code elsewhere as well. */
2179 /* Since different machines initialize their parameter registers
2180 in different orders, assume nothing. Collect the set of all
2181 parameter registers. */
2182 insn = find_first_parameter_load (insn, BB_HEAD (bb));
2184 /* If we found all the parameter loads, then we want to insert
2185 before the first parameter load.
2187 If we did not find all the parameter loads, then we might have
2188 stopped on the head of the block, which could be a CODE_LABEL.
2189 If we inserted before the CODE_LABEL, then we would be putting
2190 the insn in the wrong basic block. In that case, put the insn
2191 after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */
2192 while (LABEL_P (insn)
2193 || NOTE_INSN_BASIC_BLOCK_P (insn))
2194 insn = NEXT_INSN (insn);
2196 new_insn = emit_insn_before_noloc (pat, insn, bb);
2198 else
2199 new_insn = emit_insn_after_noloc (pat, insn, bb);
2201 while (1)
2203 if (INSN_P (pat))
2204 add_label_notes (PATTERN (pat), new_insn);
2205 if (pat == pat_end)
2206 break;
2207 pat = NEXT_INSN (pat);
2210 gcse_create_count++;
2212 if (dump_file)
2214 fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ",
2215 bb->index, INSN_UID (new_insn));
2216 fprintf (dump_file, "copying expression %d to reg %d\n",
2217 expr->bitmap_index, regno);
2221 /* Insert partially redundant expressions on edges in the CFG to make
2222 the expressions fully redundant. */
2224 static int
2225 pre_edge_insert (struct edge_list *edge_list, struct expr **index_map)
2227 int e, i, j, num_edges, set_size, did_insert = 0;
2228 sbitmap *inserted;
2230 /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
2231 if it reaches any of the deleted expressions. */
2233 set_size = pre_insert_map[0]->size;
2234 num_edges = NUM_EDGES (edge_list);
2235 inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems);
2236 sbitmap_vector_zero (inserted, num_edges);
2238 for (e = 0; e < num_edges; e++)
2240 int indx;
2241 basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
2243 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
2245 SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
2247 for (j = indx;
2248 insert && j < (int) expr_hash_table.n_elems;
2249 j++, insert >>= 1)
2250 if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
2252 struct expr *expr = index_map[j];
2253 struct occr *occr;
2255 /* Now look at each deleted occurrence of this expression. */
2256 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2258 if (! occr->deleted_p)
2259 continue;
2261 /* Insert this expression on this edge if it would
2262 reach the deleted occurrence in BB. */
2263 if (!TEST_BIT (inserted[e], j))
2265 rtx insn;
2266 edge eg = INDEX_EDGE (edge_list, e);
2268 /* We can't insert anything on an abnormal and
2269 critical edge, so we insert the insn at the end of
2270 the previous block. There are several alternatives
2271 detailed in Morgans book P277 (sec 10.5) for
2272 handling this situation. This one is easiest for
2273 now. */
2275 if (eg->flags & EDGE_ABNORMAL)
2276 insert_insn_end_basic_block (index_map[j], bb);
2277 else
2279 insn = process_insert_insn (index_map[j]);
2280 insert_insn_on_edge (insn, eg);
2283 if (dump_file)
2285 fprintf (dump_file, "PRE: edge (%d,%d), ",
2286 bb->index,
2287 INDEX_EDGE_SUCC_BB (edge_list, e)->index);
2288 fprintf (dump_file, "copy expression %d\n",
2289 expr->bitmap_index);
2292 update_ld_motion_stores (expr);
2293 SET_BIT (inserted[e], j);
2294 did_insert = 1;
2295 gcse_create_count++;
2302 sbitmap_vector_free (inserted);
2303 return did_insert;
2306 /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
2307 Given "old_reg <- expr" (INSN), instead of adding after it
2308 reaching_reg <- old_reg
2309 it's better to do the following:
2310 reaching_reg <- expr
2311 old_reg <- reaching_reg
2312 because this way copy propagation can discover additional PRE
2313 opportunities. But if this fails, we try the old way.
2314 When "expr" is a store, i.e.
2315 given "MEM <- old_reg", instead of adding after it
2316 reaching_reg <- old_reg
2317 it's better to add it before as follows:
2318 reaching_reg <- old_reg
2319 MEM <- reaching_reg. */
2321 static void
2322 pre_insert_copy_insn (struct expr *expr, rtx insn)
2324 rtx reg = expr->reaching_reg;
2325 int regno = REGNO (reg);
2326 int indx = expr->bitmap_index;
2327 rtx pat = PATTERN (insn);
2328 rtx set, first_set, new_insn;
2329 rtx old_reg;
2330 int i;
2332 /* This block matches the logic in hash_scan_insn. */
2333 switch (GET_CODE (pat))
2335 case SET:
2336 set = pat;
2337 break;
2339 case PARALLEL:
2340 /* Search through the parallel looking for the set whose
2341 source was the expression that we're interested in. */
2342 first_set = NULL_RTX;
2343 set = NULL_RTX;
2344 for (i = 0; i < XVECLEN (pat, 0); i++)
2346 rtx x = XVECEXP (pat, 0, i);
2347 if (GET_CODE (x) == SET)
2349 /* If the source was a REG_EQUAL or REG_EQUIV note, we
2350 may not find an equivalent expression, but in this
2351 case the PARALLEL will have a single set. */
2352 if (first_set == NULL_RTX)
2353 first_set = x;
2354 if (expr_equiv_p (SET_SRC (x), expr->expr))
2356 set = x;
2357 break;
2362 gcc_assert (first_set);
2363 if (set == NULL_RTX)
2364 set = first_set;
2365 break;
2367 default:
2368 gcc_unreachable ();
2371 if (REG_P (SET_DEST (set)))
2373 old_reg = SET_DEST (set);
2374 /* Check if we can modify the set destination in the original insn. */
2375 if (validate_change (insn, &SET_DEST (set), reg, 0))
2377 new_insn = gen_move_insn (old_reg, reg);
2378 new_insn = emit_insn_after (new_insn, insn);
2380 else
2382 new_insn = gen_move_insn (reg, old_reg);
2383 new_insn = emit_insn_after (new_insn, insn);
2386 else /* This is possible only in case of a store to memory. */
2388 old_reg = SET_SRC (set);
2389 new_insn = gen_move_insn (reg, old_reg);
2391 /* Check if we can modify the set source in the original insn. */
2392 if (validate_change (insn, &SET_SRC (set), reg, 0))
2393 new_insn = emit_insn_before (new_insn, insn);
2394 else
2395 new_insn = emit_insn_after (new_insn, insn);
2398 gcse_create_count++;
2400 if (dump_file)
2401 fprintf (dump_file,
2402 "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
2403 BLOCK_FOR_INSN (insn)->index, INSN_UID (new_insn), indx,
2404 INSN_UID (insn), regno);
2407 /* Copy available expressions that reach the redundant expression
2408 to `reaching_reg'. */
2410 static void
2411 pre_insert_copies (void)
2413 unsigned int i, added_copy;
2414 struct expr *expr;
2415 struct occr *occr;
2416 struct occr *avail;
2418 /* For each available expression in the table, copy the result to
2419 `reaching_reg' if the expression reaches a deleted one.
2421 ??? The current algorithm is rather brute force.
2422 Need to do some profiling. */
2424 for (i = 0; i < expr_hash_table.size; i++)
2425 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2427 /* If the basic block isn't reachable, PPOUT will be TRUE. However,
2428 we don't want to insert a copy here because the expression may not
2429 really be redundant. So only insert an insn if the expression was
2430 deleted. This test also avoids further processing if the
2431 expression wasn't deleted anywhere. */
2432 if (expr->reaching_reg == NULL)
2433 continue;
2435 /* Set when we add a copy for that expression. */
2436 added_copy = 0;
2438 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2440 if (! occr->deleted_p)
2441 continue;
2443 for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
2445 rtx insn = avail->insn;
2447 /* No need to handle this one if handled already. */
2448 if (avail->copied_p)
2449 continue;
2451 /* Don't handle this one if it's a redundant one. */
2452 if (INSN_DELETED_P (insn))
2453 continue;
2455 /* Or if the expression doesn't reach the deleted one. */
2456 if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
2457 expr,
2458 BLOCK_FOR_INSN (occr->insn)))
2459 continue;
2461 added_copy = 1;
2463 /* Copy the result of avail to reaching_reg. */
2464 pre_insert_copy_insn (expr, insn);
2465 avail->copied_p = 1;
2469 if (added_copy)
2470 update_ld_motion_stores (expr);
2474 /* Emit move from SRC to DEST noting the equivalence with expression computed
2475 in INSN. */
2477 static rtx
2478 gcse_emit_move_after (rtx dest, rtx src, rtx insn)
2480 rtx new_rtx;
2481 rtx set = single_set (insn), set2;
2482 rtx note;
2483 rtx eqv;
2485 /* This should never fail since we're creating a reg->reg copy
2486 we've verified to be valid. */
2488 new_rtx = emit_insn_after (gen_move_insn (dest, src), insn);
2490 /* Note the equivalence for local CSE pass. */
2491 set2 = single_set (new_rtx);
2492 if (!set2 || !rtx_equal_p (SET_DEST (set2), dest))
2493 return new_rtx;
2494 if ((note = find_reg_equal_equiv_note (insn)))
2495 eqv = XEXP (note, 0);
2496 else
2497 eqv = SET_SRC (set);
2499 set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv));
2501 return new_rtx;
2504 /* Delete redundant computations.
2505 Deletion is done by changing the insn to copy the `reaching_reg' of
2506 the expression into the result of the SET. It is left to later passes
2507 (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
2509 Return nonzero if a change is made. */
2511 static int
2512 pre_delete (void)
2514 unsigned int i;
2515 int changed;
2516 struct expr *expr;
2517 struct occr *occr;
2519 changed = 0;
2520 for (i = 0; i < expr_hash_table.size; i++)
2521 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2523 int indx = expr->bitmap_index;
2525 /* We only need to search antic_occr since we require ANTLOC != 0. */
2526 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2528 rtx insn = occr->insn;
2529 rtx set;
2530 basic_block bb = BLOCK_FOR_INSN (insn);
2532 /* We only delete insns that have a single_set. */
2533 if (TEST_BIT (pre_delete_map[bb->index], indx)
2534 && (set = single_set (insn)) != 0
2535 && dbg_cnt (pre_insn))
2537 /* Create a pseudo-reg to store the result of reaching
2538 expressions into. Get the mode for the new pseudo from
2539 the mode of the original destination pseudo. */
2540 if (expr->reaching_reg == NULL)
2541 expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set));
2543 gcse_emit_move_after (SET_DEST (set), expr->reaching_reg, insn);
2544 delete_insn (insn);
2545 occr->deleted_p = 1;
2546 changed = 1;
2547 gcse_subst_count++;
2549 if (dump_file)
2551 fprintf (dump_file,
2552 "PRE: redundant insn %d (expression %d) in ",
2553 INSN_UID (insn), indx);
2554 fprintf (dump_file, "bb %d, reaching reg is %d\n",
2555 bb->index, REGNO (expr->reaching_reg));
2561 return changed;
2564 /* Perform GCSE optimizations using PRE.
2565 This is called by one_pre_gcse_pass after all the dataflow analysis
2566 has been done.
2568 This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
2569 lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
2570 Compiler Design and Implementation.
2572 ??? A new pseudo reg is created to hold the reaching expression. The nice
2573 thing about the classical approach is that it would try to use an existing
2574 reg. If the register can't be adequately optimized [i.e. we introduce
2575 reload problems], one could add a pass here to propagate the new register
2576 through the block.
2578 ??? We don't handle single sets in PARALLELs because we're [currently] not
2579 able to copy the rest of the parallel when we insert copies to create full
2580 redundancies from partial redundancies. However, there's no reason why we
2581 can't handle PARALLELs in the cases where there are no partial
2582 redundancies. */
2584 static int
2585 pre_gcse (struct edge_list *edge_list)
2587 unsigned int i;
2588 int did_insert, changed;
2589 struct expr **index_map;
2590 struct expr *expr;
2592 /* Compute a mapping from expression number (`bitmap_index') to
2593 hash table entry. */
2595 index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
2596 for (i = 0; i < expr_hash_table.size; i++)
2597 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2598 index_map[expr->bitmap_index] = expr;
2600 /* Delete the redundant insns first so that
2601 - we know what register to use for the new insns and for the other
2602 ones with reaching expressions
2603 - we know which insns are redundant when we go to create copies */
2605 changed = pre_delete ();
2606 did_insert = pre_edge_insert (edge_list, index_map);
2608 /* In other places with reaching expressions, copy the expression to the
2609 specially allocated pseudo-reg that reaches the redundant expr. */
2610 pre_insert_copies ();
2611 if (did_insert)
2613 commit_edge_insertions ();
2614 changed = 1;
2617 free (index_map);
2618 return changed;
2621 /* Top level routine to perform one PRE GCSE pass.
2623 Return nonzero if a change was made. */
2625 static int
2626 one_pre_gcse_pass (void)
2628 int changed = 0;
2630 gcse_subst_count = 0;
2631 gcse_create_count = 0;
2633 /* Return if there's nothing to do, or it is too expensive. */
2634 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
2635 || is_too_expensive (_("PRE disabled")))
2636 return 0;
2638 /* We need alias. */
2639 init_alias_analysis ();
2641 bytes_used = 0;
2642 gcc_obstack_init (&gcse_obstack);
2643 alloc_gcse_mem ();
2645 alloc_hash_table (&expr_hash_table);
2646 add_noreturn_fake_exit_edges ();
2647 if (flag_gcse_lm)
2648 compute_ld_motion_mems ();
2650 compute_hash_table (&expr_hash_table);
2651 if (flag_gcse_lm)
2652 trim_ld_motion_mems ();
2653 if (dump_file)
2654 dump_hash_table (dump_file, "Expression", &expr_hash_table);
2656 if (expr_hash_table.n_elems > 0)
2658 struct edge_list *edge_list;
2659 alloc_pre_mem (last_basic_block, expr_hash_table.n_elems);
2660 edge_list = compute_pre_data ();
2661 changed |= pre_gcse (edge_list);
2662 free_edge_list (edge_list);
2663 free_pre_mem ();
2666 if (flag_gcse_lm)
2667 free_ld_motion_mems ();
2668 remove_fake_exit_edges ();
2669 free_hash_table (&expr_hash_table);
2671 free_gcse_mem ();
2672 obstack_free (&gcse_obstack, NULL);
2674 /* We are finished with alias. */
2675 end_alias_analysis ();
2677 if (dump_file)
2679 fprintf (dump_file, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ",
2680 current_function_name (), n_basic_blocks, bytes_used);
2681 fprintf (dump_file, "%d substs, %d insns created\n",
2682 gcse_subst_count, gcse_create_count);
2685 return changed;
2688 /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
2689 to INSN. If such notes are added to an insn which references a
2690 CODE_LABEL, the LABEL_NUSES count is incremented. We have to add
2691 that note, because the following loop optimization pass requires
2692 them. */
2694 /* ??? If there was a jump optimization pass after gcse and before loop,
2695 then we would not need to do this here, because jump would add the
2696 necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes. */
2698 static void
2699 add_label_notes (rtx x, rtx insn)
2701 enum rtx_code code = GET_CODE (x);
2702 int i, j;
2703 const char *fmt;
2705 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
2707 /* This code used to ignore labels that referred to dispatch tables to
2708 avoid flow generating (slightly) worse code.
2710 We no longer ignore such label references (see LABEL_REF handling in
2711 mark_jump_label for additional information). */
2713 /* There's no reason for current users to emit jump-insns with
2714 such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
2715 notes. */
2716 gcc_assert (!JUMP_P (insn));
2717 add_reg_note (insn, REG_LABEL_OPERAND, XEXP (x, 0));
2719 if (LABEL_P (XEXP (x, 0)))
2720 LABEL_NUSES (XEXP (x, 0))++;
2722 return;
2725 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2727 if (fmt[i] == 'e')
2728 add_label_notes (XEXP (x, i), insn);
2729 else if (fmt[i] == 'E')
2730 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2731 add_label_notes (XVECEXP (x, i, j), insn);
2735 /* Code Hoisting variables and subroutines. */
2737 /* Very busy expressions. */
2738 static sbitmap *hoist_vbein;
2739 static sbitmap *hoist_vbeout;
2741 /* ??? We could compute post dominators and run this algorithm in
2742 reverse to perform tail merging, doing so would probably be
2743 more effective than the tail merging code in jump.c.
2745 It's unclear if tail merging could be run in parallel with
2746 code hoisting. It would be nice. */
2748 /* Allocate vars used for code hoisting analysis. */
2750 static void
2751 alloc_code_hoist_mem (int n_blocks, int n_exprs)
2753 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
2754 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
2755 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
2757 hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
2758 hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
2761 /* Free vars used for code hoisting analysis. */
2763 static void
2764 free_code_hoist_mem (void)
2766 sbitmap_vector_free (antloc);
2767 sbitmap_vector_free (transp);
2768 sbitmap_vector_free (comp);
2770 sbitmap_vector_free (hoist_vbein);
2771 sbitmap_vector_free (hoist_vbeout);
2773 free_dominance_info (CDI_DOMINATORS);
2776 /* Compute the very busy expressions at entry/exit from each block.
2778 An expression is very busy if all paths from a given point
2779 compute the expression. */
2781 static void
2782 compute_code_hoist_vbeinout (void)
2784 int changed, passes;
2785 basic_block bb;
2787 sbitmap_vector_zero (hoist_vbeout, last_basic_block);
2788 sbitmap_vector_zero (hoist_vbein, last_basic_block);
2790 passes = 0;
2791 changed = 1;
2793 while (changed)
2795 changed = 0;
2797 /* We scan the blocks in the reverse order to speed up
2798 the convergence. */
2799 FOR_EACH_BB_REVERSE (bb)
2801 if (bb->next_bb != EXIT_BLOCK_PTR)
2803 sbitmap_intersection_of_succs (hoist_vbeout[bb->index],
2804 hoist_vbein, bb->index);
2806 /* Include expressions in VBEout that are calculated
2807 in BB and available at its end. */
2808 sbitmap_a_or_b (hoist_vbeout[bb->index],
2809 hoist_vbeout[bb->index], comp[bb->index]);
2812 changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index],
2813 antloc[bb->index],
2814 hoist_vbeout[bb->index],
2815 transp[bb->index]);
2818 passes++;
2821 if (dump_file)
2823 fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
2825 FOR_EACH_BB (bb)
2827 fprintf (dump_file, "vbein (%d): ", bb->index);
2828 dump_sbitmap_file (dump_file, hoist_vbein[bb->index]);
2829 fprintf (dump_file, "vbeout(%d): ", bb->index);
2830 dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]);
2835 /* Top level routine to do the dataflow analysis needed by code hoisting. */
2837 static void
2838 compute_code_hoist_data (void)
2840 compute_local_properties (transp, comp, antloc, &expr_hash_table);
2841 prune_expressions (false);
2842 compute_code_hoist_vbeinout ();
2843 calculate_dominance_info (CDI_DOMINATORS);
2844 if (dump_file)
2845 fprintf (dump_file, "\n");
2848 /* Determine if the expression identified by EXPR_INDEX would
2849 reach BB unimpared if it was placed at the end of EXPR_BB.
2850 Stop the search if the expression would need to be moved more
2851 than DISTANCE instructions.
2853 It's unclear exactly what Muchnick meant by "unimpared". It seems
2854 to me that the expression must either be computed or transparent in
2855 *every* block in the path(s) from EXPR_BB to BB. Any other definition
2856 would allow the expression to be hoisted out of loops, even if
2857 the expression wasn't a loop invariant.
2859 Contrast this to reachability for PRE where an expression is
2860 considered reachable if *any* path reaches instead of *all*
2861 paths. */
2863 static int
2864 hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb,
2865 char *visited, int distance, int *bb_size)
2867 edge pred;
2868 edge_iterator ei;
2869 int visited_allocated_locally = 0;
2871 /* Terminate the search if distance, for which EXPR is allowed to move,
2872 is exhausted. */
2873 if (distance > 0)
2875 distance -= bb_size[bb->index];
2877 if (distance <= 0)
2878 return 0;
2880 else
2881 gcc_assert (distance == 0);
2883 if (visited == NULL)
2885 visited_allocated_locally = 1;
2886 visited = XCNEWVEC (char, last_basic_block);
2889 FOR_EACH_EDGE (pred, ei, bb->preds)
2891 basic_block pred_bb = pred->src;
2893 if (pred->src == ENTRY_BLOCK_PTR)
2894 break;
2895 else if (pred_bb == expr_bb)
2896 continue;
2897 else if (visited[pred_bb->index])
2898 continue;
2900 else if (! TEST_BIT (transp[pred_bb->index], expr_index))
2901 break;
2903 /* Not killed. */
2904 else
2906 visited[pred_bb->index] = 1;
2907 if (! hoist_expr_reaches_here_p (expr_bb, expr_index, pred_bb,
2908 visited, distance, bb_size))
2909 break;
2912 if (visited_allocated_locally)
2913 free (visited);
2915 return (pred == NULL);
2918 /* Find occurence in BB. */
2920 static struct occr *
2921 find_occr_in_bb (struct occr *occr, basic_block bb)
2923 /* Find the right occurrence of this expression. */
2924 while (occr && BLOCK_FOR_INSN (occr->insn) != bb)
2925 occr = occr->next;
2927 return occr;
2930 /* Actually perform code hoisting. */
2932 static int
2933 hoist_code (void)
2935 basic_block bb, dominated;
2936 VEC (basic_block, heap) *dom_tree_walk;
2937 unsigned int dom_tree_walk_index;
2938 VEC (basic_block, heap) *domby;
2939 unsigned int i,j;
2940 struct expr **index_map;
2941 struct expr *expr;
2942 int *to_bb_head;
2943 int *bb_size;
2944 int changed = 0;
2946 /* Compute a mapping from expression number (`bitmap_index') to
2947 hash table entry. */
2949 index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
2950 for (i = 0; i < expr_hash_table.size; i++)
2951 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2952 index_map[expr->bitmap_index] = expr;
2954 /* Calculate sizes of basic blocks and note how far
2955 each instruction is from the start of its block. We then use this
2956 data to restrict distance an expression can travel. */
2958 to_bb_head = XCNEWVEC (int, get_max_uid ());
2959 bb_size = XCNEWVEC (int, last_basic_block);
2961 FOR_EACH_BB (bb)
2963 rtx insn;
2964 int to_head;
2966 to_head = 0;
2967 FOR_BB_INSNS (bb, insn)
2969 /* Don't count debug instructions to avoid them affecting
2970 decision choices. */
2971 if (NONDEBUG_INSN_P (insn))
2972 to_bb_head[INSN_UID (insn)] = to_head++;
2975 bb_size[bb->index] = to_head;
2978 gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR->succs) == 1
2979 && (EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest
2980 == ENTRY_BLOCK_PTR->next_bb));
2982 dom_tree_walk = get_all_dominated_blocks (CDI_DOMINATORS,
2983 ENTRY_BLOCK_PTR->next_bb);
2985 /* Walk over each basic block looking for potentially hoistable
2986 expressions, nothing gets hoisted from the entry block. */
2987 FOR_EACH_VEC_ELT (basic_block, dom_tree_walk, dom_tree_walk_index, bb)
2989 domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH);
2991 if (VEC_length (basic_block, domby) == 0)
2992 continue;
2994 /* Examine each expression that is very busy at the exit of this
2995 block. These are the potentially hoistable expressions. */
2996 for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++)
2998 if (TEST_BIT (hoist_vbeout[bb->index], i))
3000 /* Current expression. */
3001 struct expr *expr = index_map[i];
3002 /* Number of occurences of EXPR that can be hoisted to BB. */
3003 int hoistable = 0;
3004 /* Basic blocks that have occurences reachable from BB. */
3005 bitmap_head _from_bbs, *from_bbs = &_from_bbs;
3006 /* Occurences reachable from BB. */
3007 VEC (occr_t, heap) *occrs_to_hoist = NULL;
3008 /* We want to insert the expression into BB only once, so
3009 note when we've inserted it. */
3010 int insn_inserted_p;
3011 occr_t occr;
3013 bitmap_initialize (from_bbs, 0);
3015 /* If an expression is computed in BB and is available at end of
3016 BB, hoist all occurences dominated by BB to BB. */
3017 if (TEST_BIT (comp[bb->index], i))
3019 occr = find_occr_in_bb (expr->antic_occr, bb);
3021 if (occr)
3023 /* An occurence might've been already deleted
3024 while processing a dominator of BB. */
3025 if (!occr->deleted_p)
3027 gcc_assert (NONDEBUG_INSN_P (occr->insn));
3028 hoistable++;
3031 else
3032 hoistable++;
3035 /* We've found a potentially hoistable expression, now
3036 we look at every block BB dominates to see if it
3037 computes the expression. */
3038 FOR_EACH_VEC_ELT (basic_block, domby, j, dominated)
3040 int max_distance;
3042 /* Ignore self dominance. */
3043 if (bb == dominated)
3044 continue;
3045 /* We've found a dominated block, now see if it computes
3046 the busy expression and whether or not moving that
3047 expression to the "beginning" of that block is safe. */
3048 if (!TEST_BIT (antloc[dominated->index], i))
3049 continue;
3051 occr = find_occr_in_bb (expr->antic_occr, dominated);
3052 gcc_assert (occr);
3054 /* An occurence might've been already deleted
3055 while processing a dominator of BB. */
3056 if (occr->deleted_p)
3057 continue;
3058 gcc_assert (NONDEBUG_INSN_P (occr->insn));
3060 max_distance = expr->max_distance;
3061 if (max_distance > 0)
3062 /* Adjust MAX_DISTANCE to account for the fact that
3063 OCCR won't have to travel all of DOMINATED, but
3064 only part of it. */
3065 max_distance += (bb_size[dominated->index]
3066 - to_bb_head[INSN_UID (occr->insn)]);
3068 /* Note if the expression would reach the dominated block
3069 unimpared if it was placed at the end of BB.
3071 Keep track of how many times this expression is hoistable
3072 from a dominated block into BB. */
3073 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL,
3074 max_distance, bb_size))
3076 hoistable++;
3077 VEC_safe_push (occr_t, heap,
3078 occrs_to_hoist, occr);
3079 bitmap_set_bit (from_bbs, dominated->index);
3083 /* If we found more than one hoistable occurrence of this
3084 expression, then note it in the vector of expressions to
3085 hoist. It makes no sense to hoist things which are computed
3086 in only one BB, and doing so tends to pessimize register
3087 allocation. One could increase this value to try harder
3088 to avoid any possible code expansion due to register
3089 allocation issues; however experiments have shown that
3090 the vast majority of hoistable expressions are only movable
3091 from two successors, so raising this threshold is likely
3092 to nullify any benefit we get from code hoisting. */
3093 if (hoistable > 1 && dbg_cnt (hoist_insn))
3095 /* If (hoistable != VEC_length), then there is
3096 an occurence of EXPR in BB itself. Don't waste
3097 time looking for LCA in this case. */
3098 if ((unsigned) hoistable
3099 == VEC_length (occr_t, occrs_to_hoist))
3101 basic_block lca;
3103 lca = nearest_common_dominator_for_set (CDI_DOMINATORS,
3104 from_bbs);
3105 if (lca != bb)
3106 /* Punt, it's better to hoist these occurences to
3107 LCA. */
3108 VEC_free (occr_t, heap, occrs_to_hoist);
3111 else
3112 /* Punt, no point hoisting a single occurence. */
3113 VEC_free (occr_t, heap, occrs_to_hoist);
3115 insn_inserted_p = 0;
3117 /* Walk through occurences of I'th expressions we want
3118 to hoist to BB and make the transformations. */
3119 FOR_EACH_VEC_ELT (occr_t, occrs_to_hoist, j, occr)
3121 rtx insn;
3122 rtx set;
3124 gcc_assert (!occr->deleted_p);
3126 insn = occr->insn;
3127 set = single_set (insn);
3128 gcc_assert (set);
3130 /* Create a pseudo-reg to store the result of reaching
3131 expressions into. Get the mode for the new pseudo
3132 from the mode of the original destination pseudo.
3134 It is important to use new pseudos whenever we
3135 emit a set. This will allow reload to use
3136 rematerialization for such registers. */
3137 if (!insn_inserted_p)
3138 expr->reaching_reg
3139 = gen_reg_rtx_and_attrs (SET_DEST (set));
3141 gcse_emit_move_after (SET_DEST (set), expr->reaching_reg,
3142 insn);
3143 delete_insn (insn);
3144 occr->deleted_p = 1;
3145 changed = 1;
3146 gcse_subst_count++;
3148 if (!insn_inserted_p)
3150 insert_insn_end_basic_block (expr, bb);
3151 insn_inserted_p = 1;
3155 VEC_free (occr_t, heap, occrs_to_hoist);
3156 bitmap_clear (from_bbs);
3159 VEC_free (basic_block, heap, domby);
3162 VEC_free (basic_block, heap, dom_tree_walk);
3163 free (bb_size);
3164 free (to_bb_head);
3165 free (index_map);
3167 return changed;
3170 /* Top level routine to perform one code hoisting (aka unification) pass
3172 Return nonzero if a change was made. */
3174 static int
3175 one_code_hoisting_pass (void)
3177 int changed = 0;
3179 gcse_subst_count = 0;
3180 gcse_create_count = 0;
3182 /* Return if there's nothing to do, or it is too expensive. */
3183 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
3184 || is_too_expensive (_("GCSE disabled")))
3185 return 0;
3187 doing_code_hoisting_p = true;
3189 /* We need alias. */
3190 init_alias_analysis ();
3192 bytes_used = 0;
3193 gcc_obstack_init (&gcse_obstack);
3194 alloc_gcse_mem ();
3196 alloc_hash_table (&expr_hash_table);
3197 compute_hash_table (&expr_hash_table);
3198 if (dump_file)
3199 dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table);
3201 if (expr_hash_table.n_elems > 0)
3203 alloc_code_hoist_mem (last_basic_block, expr_hash_table.n_elems);
3204 compute_code_hoist_data ();
3205 changed = hoist_code ();
3206 free_code_hoist_mem ();
3209 free_hash_table (&expr_hash_table);
3210 free_gcse_mem ();
3211 obstack_free (&gcse_obstack, NULL);
3213 /* We are finished with alias. */
3214 end_alias_analysis ();
3216 if (dump_file)
3218 fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ",
3219 current_function_name (), n_basic_blocks, bytes_used);
3220 fprintf (dump_file, "%d substs, %d insns created\n",
3221 gcse_subst_count, gcse_create_count);
3224 doing_code_hoisting_p = false;
3226 return changed;
3229 /* Here we provide the things required to do store motion towards the exit.
3230 In order for this to be effective, gcse also needed to be taught how to
3231 move a load when it is killed only by a store to itself.
3233 int i;
3234 float a[10];
3236 void foo(float scale)
3238 for (i=0; i<10; i++)
3239 a[i] *= scale;
3242 'i' is both loaded and stored to in the loop. Normally, gcse cannot move
3243 the load out since its live around the loop, and stored at the bottom
3244 of the loop.
3246 The 'Load Motion' referred to and implemented in this file is
3247 an enhancement to gcse which when using edge based LCM, recognizes
3248 this situation and allows gcse to move the load out of the loop.
3250 Once gcse has hoisted the load, store motion can then push this
3251 load towards the exit, and we end up with no loads or stores of 'i'
3252 in the loop. */
3254 static hashval_t
3255 pre_ldst_expr_hash (const void *p)
3257 int do_not_record_p = 0;
3258 const struct ls_expr *const x = (const struct ls_expr *) p;
3259 return
3260 hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false);
3263 static int
3264 pre_ldst_expr_eq (const void *p1, const void *p2)
3266 const struct ls_expr *const ptr1 = (const struct ls_expr *) p1,
3267 *const ptr2 = (const struct ls_expr *) p2;
3268 return expr_equiv_p (ptr1->pattern, ptr2->pattern);
3271 /* This will search the ldst list for a matching expression. If it
3272 doesn't find one, we create one and initialize it. */
3274 static struct ls_expr *
3275 ldst_entry (rtx x)
3277 int do_not_record_p = 0;
3278 struct ls_expr * ptr;
3279 unsigned int hash;
3280 void **slot;
3281 struct ls_expr e;
3283 hash = hash_rtx (x, GET_MODE (x), &do_not_record_p,
3284 NULL, /*have_reg_qty=*/false);
3286 e.pattern = x;
3287 slot = htab_find_slot_with_hash (pre_ldst_table, &e, hash, INSERT);
3288 if (*slot)
3289 return (struct ls_expr *)*slot;
3291 ptr = XNEW (struct ls_expr);
3293 ptr->next = pre_ldst_mems;
3294 ptr->expr = NULL;
3295 ptr->pattern = x;
3296 ptr->pattern_regs = NULL_RTX;
3297 ptr->loads = NULL_RTX;
3298 ptr->stores = NULL_RTX;
3299 ptr->reaching_reg = NULL_RTX;
3300 ptr->invalid = 0;
3301 ptr->index = 0;
3302 ptr->hash_index = hash;
3303 pre_ldst_mems = ptr;
3304 *slot = ptr;
3306 return ptr;
3309 /* Free up an individual ldst entry. */
3311 static void
3312 free_ldst_entry (struct ls_expr * ptr)
3314 free_INSN_LIST_list (& ptr->loads);
3315 free_INSN_LIST_list (& ptr->stores);
3317 free (ptr);
3320 /* Free up all memory associated with the ldst list. */
3322 static void
3323 free_ld_motion_mems (void)
3325 if (pre_ldst_table)
3326 htab_delete (pre_ldst_table);
3327 pre_ldst_table = NULL;
3329 while (pre_ldst_mems)
3331 struct ls_expr * tmp = pre_ldst_mems;
3333 pre_ldst_mems = pre_ldst_mems->next;
3335 free_ldst_entry (tmp);
3338 pre_ldst_mems = NULL;
3341 /* Dump debugging info about the ldst list. */
3343 static void
3344 print_ldst_list (FILE * file)
3346 struct ls_expr * ptr;
3348 fprintf (file, "LDST list: \n");
3350 for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
3352 fprintf (file, " Pattern (%3d): ", ptr->index);
3354 print_rtl (file, ptr->pattern);
3356 fprintf (file, "\n Loads : ");
3358 if (ptr->loads)
3359 print_rtl (file, ptr->loads);
3360 else
3361 fprintf (file, "(nil)");
3363 fprintf (file, "\n Stores : ");
3365 if (ptr->stores)
3366 print_rtl (file, ptr->stores);
3367 else
3368 fprintf (file, "(nil)");
3370 fprintf (file, "\n\n");
3373 fprintf (file, "\n");
3376 /* Returns 1 if X is in the list of ldst only expressions. */
3378 static struct ls_expr *
3379 find_rtx_in_ldst (rtx x)
3381 struct ls_expr e;
3382 void **slot;
3383 if (!pre_ldst_table)
3384 return NULL;
3385 e.pattern = x;
3386 slot = htab_find_slot (pre_ldst_table, &e, NO_INSERT);
3387 if (!slot || ((struct ls_expr *)*slot)->invalid)
3388 return NULL;
3389 return (struct ls_expr *) *slot;
3392 /* Load Motion for loads which only kill themselves. */
3394 /* Return true if x, a MEM, is a simple access with no side effects.
3395 These are the types of loads we consider for the ld_motion list,
3396 otherwise we let the usual aliasing take care of it. */
3398 static int
3399 simple_mem (const_rtx x)
3401 if (MEM_VOLATILE_P (x))
3402 return 0;
3404 if (GET_MODE (x) == BLKmode)
3405 return 0;
3407 /* If we are handling exceptions, we must be careful with memory references
3408 that may trap. If we are not, the behavior is undefined, so we may just
3409 continue. */
3410 if (cfun->can_throw_non_call_exceptions && may_trap_p (x))
3411 return 0;
3413 if (side_effects_p (x))
3414 return 0;
3416 /* Do not consider function arguments passed on stack. */
3417 if (reg_mentioned_p (stack_pointer_rtx, x))
3418 return 0;
3420 if (flag_float_store && FLOAT_MODE_P (GET_MODE (x)))
3421 return 0;
3423 return 1;
3426 /* Make sure there isn't a buried reference in this pattern anywhere.
3427 If there is, invalidate the entry for it since we're not capable
3428 of fixing it up just yet.. We have to be sure we know about ALL
3429 loads since the aliasing code will allow all entries in the
3430 ld_motion list to not-alias itself. If we miss a load, we will get
3431 the wrong value since gcse might common it and we won't know to
3432 fix it up. */
3434 static void
3435 invalidate_any_buried_refs (rtx x)
3437 const char * fmt;
3438 int i, j;
3439 struct ls_expr * ptr;
3441 /* Invalidate it in the list. */
3442 if (MEM_P (x) && simple_mem (x))
3444 ptr = ldst_entry (x);
3445 ptr->invalid = 1;
3448 /* Recursively process the insn. */
3449 fmt = GET_RTX_FORMAT (GET_CODE (x));
3451 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3453 if (fmt[i] == 'e')
3454 invalidate_any_buried_refs (XEXP (x, i));
3455 else if (fmt[i] == 'E')
3456 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3457 invalidate_any_buried_refs (XVECEXP (x, i, j));
3461 /* The maximum number of load/store motions that can be performed for one
3462 loop. */
3463 static unsigned maximum_lsm_limit;
3465 /* Find all the 'simple' MEMs which are used in LOADs and STORES. Simple
3466 being defined as MEM loads and stores to symbols, with no side effects
3467 and no registers in the expression. For a MEM destination, we also
3468 check that the insn is still valid if we replace the destination with a
3469 REG, as is done in update_ld_motion_stores. If there are any uses/defs
3470 which don't match this criteria, they are invalidated and trimmed out
3471 later. */
3473 static void
3474 compute_ld_motion_mems (void)
3476 struct ls_expr * ptr;
3477 basic_block bb;
3478 rtx insn;
3480 init_loop_lsm_limit ();
3482 pre_ldst_mems = NULL;
3483 pre_ldst_table
3484 = htab_create (13, pre_ldst_expr_hash, pre_ldst_expr_eq, NULL);
3486 FOR_EACH_BB (bb)
3488 struct loop *loop = bb->loop_father;
3489 unsigned ld_motion_limit = find_loop_lsm_limit (loop);
3490 unsigned ld_motion_count = 0;
3492 FOR_BB_INSNS (bb, insn)
3494 if (NONDEBUG_INSN_P (insn))
3496 if (GET_CODE (PATTERN (insn)) == SET)
3498 rtx src = SET_SRC (PATTERN (insn));
3499 rtx dest = SET_DEST (PATTERN (insn));
3501 /* Check for a simple LOAD... */
3502 if (MEM_P (src) && simple_mem (src))
3504 ptr = ldst_entry (src);
3505 if (REG_P (dest))
3506 ptr->loads = alloc_INSN_LIST (insn, ptr->loads);
3507 else
3508 ptr->invalid = 1;
3510 else
3512 /* Make sure there isn't a buried load somewhere. */
3513 invalidate_any_buried_refs (src);
3516 /* Check for stores. Don't worry about aliased ones, they
3517 will block any movement we might do later. We only care
3518 about this exact pattern since those are the only
3519 circumstance that we will ignore the aliasing info. */
3520 if (MEM_P (dest) && simple_mem (dest))
3522 ptr = ldst_entry (dest);
3524 if (! MEM_P (src)
3525 && GET_CODE (src) != ASM_OPERANDS
3526 /* Check for REG manually since want_to_gcse_p
3527 returns 0 for all REGs. */
3528 && can_assign_to_reg_without_clobbers_p (src)
3529 && ld_motion_count < ld_motion_limit)
3531 ptr->stores = alloc_INSN_LIST (insn, ptr->stores);
3532 ld_motion_count++;
3534 else
3536 ptr->invalid = 1;
3537 if (dump_file && ld_motion_count >= ld_motion_limit)
3538 fprintf (dump_file,
3539 "suppress ld_motion in loop=%d bb=%d due"
3540 " to reg pressure.\n",
3541 loop->num, bb->index);
3546 else
3547 invalidate_any_buried_refs (PATTERN (insn));
3550 adjust_loop_lsm_limit (loop, ld_motion_count);
3553 fini_loop_lsm_limit ();
3556 typedef struct
3558 struct loop *loop;
3559 unsigned lsm_limit;
3560 } loop_lsm_limit_map;
3562 /* hash_table that maps loop to lsm_limit. */
3563 static htab_t loop_lsm_limit_map_htab = NULL;
3565 /* hash function for loop_lsm_limit_map_htab. */
3567 static hashval_t
3568 loop_lsm_limit_map_hash (const void *p)
3570 const loop_lsm_limit_map *const x = (const loop_lsm_limit_map*) p;
3571 return htab_hash_pointer (x->loop);
3574 /* hash equal function for loop_lsm_limit_map_htab. */
3576 static int
3577 loop_lsm_limit_map_eq (const void *p1, const void *p2)
3579 const loop_lsm_limit_map *const ptr1 = (const loop_lsm_limit_map *) p1;
3580 const loop_lsm_limit_map *const ptr2 = (const loop_lsm_limit_map *) p2;
3582 return htab_eq_pointer (ptr1->loop, ptr2->loop);
3585 /* free one entry in loop_lsm_limit_map_htab. */
3587 static int
3588 free_loop_lsm_limit_map (void **slot, void *data ATTRIBUTE_UNUSED)
3590 free(*(loop_lsm_limit_map **) slot);
3591 return 1;
3594 /* use the loops strcture to find enclosing loop for each basic_block */
3595 static struct loops loops_lsm;
3596 static bool dominator_info_avail_before_lsm_limit = false;
3598 /* Initalization function for suppressing execessive load/store motions.
3599 Build the loop structure so that we can count the number of motions
3600 performed. Set the maximum number of motions for a loop. */
3602 static void
3603 init_loop_lsm_limit (void)
3605 dominator_info_avail_before_lsm_limit = dom_info_available_p(CDI_DOMINATORS);
3606 flow_loops_find (&loops_lsm);
3608 loop_lsm_limit_map_htab = htab_create (59, loop_lsm_limit_map_hash,
3609 loop_lsm_limit_map_eq, NULL);
3611 /* avail_regs minus the induction variables */
3612 maximum_lsm_limit = target_avail_regs - 1;
3615 /* Cleanup function: destroy loop structure and free space. */
3617 static void
3618 fini_loop_lsm_limit (void)
3620 basic_block bb;
3622 flow_loops_free (&loops_lsm);
3624 if (!dominator_info_avail_before_lsm_limit)
3625 free_dominance_info (CDI_DOMINATORS);
3627 FOR_ALL_BB (bb)
3628 bb->loop_father = NULL;
3630 if (loop_lsm_limit_map_htab)
3632 htab_traverse (loop_lsm_limit_map_htab, free_loop_lsm_limit_map, NULL);
3633 htab_delete (loop_lsm_limit_map_htab);
3637 /* This routine tries to find the number of registers that
3638 (1) live across the iteration, and
3639 (2) referenced in the loop.
3640 those store-motions performed in tree-lim falls into this category. */
3642 static unsigned
3643 estimate_reg_pressure_before_lsm (struct loop *loop)
3645 basic_block *body;
3646 unsigned int i;
3647 unsigned int count;
3648 regset live_across_iteration;
3649 regset used_in_loop;
3650 basic_block header, latch;
3652 if (!loop)
3653 return 0;
3655 gcc_assert (loop != loops_lsm.tree_root);
3657 latch = loop->latch;
3658 /* don't handle multiple latches */
3659 if (!latch)
3660 return 0;
3662 header = loop->header;
3663 gcc_assert (header);
3665 live_across_iteration = ALLOC_REG_SET (&reg_obstack);
3666 COPY_REG_SET (live_across_iteration, DF_LR_IN (header));
3667 AND_REG_SET (live_across_iteration, DF_LR_OUT (latch));
3669 used_in_loop = ALLOC_REG_SET (&reg_obstack);
3670 CLEAR_REG_SET (used_in_loop);
3672 body = get_loop_body (loop);
3673 /* this loop computes a subset of must-use */
3674 for (i=0; i<loop->num_nodes; i++)
3676 basic_block bb = *(body + i);
3678 if (dominated_by_p (CDI_DOMINATORS, latch, bb))
3679 IOR_REG_SET (used_in_loop, &(DF_LR_BB_INFO (bb)->use));
3683 AND_REG_SET (live_across_iteration, used_in_loop);
3685 count = bitmap_count_bits (live_across_iteration);
3687 FREE_REG_SET (live_across_iteration);
3688 FREE_REG_SET (used_in_loop);
3690 return count;
3693 /* Find the number of load/store motion we can perform without
3694 create excessive register pressure to loop LOOP */
3696 static unsigned
3697 find_loop_lsm_limit (struct loop *loop)
3699 void **slot;
3700 loop_lsm_limit_map t, *ret;
3701 unsigned reg_pressure;
3702 unsigned limit = maximum_lsm_limit;
3704 if (!loop || loop == loops_lsm.tree_root)
3705 return (unsigned)-1;
3707 t.loop = loop;
3708 slot = htab_find_slot (loop_lsm_limit_map_htab, &t, NO_INSERT);
3709 if (slot)
3710 return (*(loop_lsm_limit_map**)slot)->lsm_limit;
3712 /* insert this loop and initialize it */
3713 reg_pressure = estimate_reg_pressure_before_lsm (loop);
3714 ret = (loop_lsm_limit_map*) gmalloc (sizeof (loop_lsm_limit_map));
3715 gcc_assert (ret);
3717 limit = (limit > reg_pressure ? limit - reg_pressure : 0);
3718 ret->loop = loop;
3719 ret->lsm_limit = limit;
3720 slot = htab_find_slot (loop_lsm_limit_map_htab, ret, INSERT);
3721 gcc_assert (slot);
3722 (*slot) = (void**)ret;
3724 return limit;
3727 /* Adjust the lsm limit based on the instances that have been
3728 performed. */
3730 static void
3731 adjust_loop_lsm_limit (struct loop *loop, unsigned performed)
3733 void **slot;
3734 loop_lsm_limit_map t;
3735 unsigned limit;
3737 if (performed == 0 || loop == NULL || loop == loops_lsm.tree_root)
3738 return;
3740 t.loop = loop;
3741 slot = htab_find_slot (loop_lsm_limit_map_htab, &t, NO_INSERT);
3742 gcc_assert (slot);
3743 limit = (*(loop_lsm_limit_map**)slot)->lsm_limit;
3744 gcc_assert (limit >= performed);
3745 (*(loop_lsm_limit_map**)slot)->lsm_limit = limit - performed;
3748 /* Remove any references that have been either invalidated or are not in the
3749 expression list for pre gcse. */
3751 static void
3752 trim_ld_motion_mems (void)
3754 struct ls_expr * * last = & pre_ldst_mems;
3755 struct ls_expr * ptr = pre_ldst_mems;
3757 while (ptr != NULL)
3759 struct expr * expr;
3761 /* Delete if entry has been made invalid. */
3762 if (! ptr->invalid)
3764 /* Delete if we cannot find this mem in the expression list. */
3765 unsigned int hash = ptr->hash_index % expr_hash_table.size;
3767 for (expr = expr_hash_table.table[hash];
3768 expr != NULL;
3769 expr = expr->next_same_hash)
3770 if (expr_equiv_p (expr->expr, ptr->pattern))
3771 break;
3773 else
3774 expr = (struct expr *) 0;
3776 if (expr)
3778 /* Set the expression field if we are keeping it. */
3779 ptr->expr = expr;
3780 last = & ptr->next;
3781 ptr = ptr->next;
3783 else
3785 *last = ptr->next;
3786 htab_remove_elt_with_hash (pre_ldst_table, ptr, ptr->hash_index);
3787 free_ldst_entry (ptr);
3788 ptr = * last;
3792 /* Show the world what we've found. */
3793 if (dump_file && pre_ldst_mems != NULL)
3794 print_ldst_list (dump_file);
3797 /* This routine will take an expression which we are replacing with
3798 a reaching register, and update any stores that are needed if
3799 that expression is in the ld_motion list. Stores are updated by
3800 copying their SRC to the reaching register, and then storing
3801 the reaching register into the store location. These keeps the
3802 correct value in the reaching register for the loads. */
3804 static void
3805 update_ld_motion_stores (struct expr * expr)
3807 struct ls_expr * mem_ptr;
3809 if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
3811 /* We can try to find just the REACHED stores, but is shouldn't
3812 matter to set the reaching reg everywhere... some might be
3813 dead and should be eliminated later. */
3815 /* We replace (set mem expr) with (set reg expr) (set mem reg)
3816 where reg is the reaching reg used in the load. We checked in
3817 compute_ld_motion_mems that we can replace (set mem expr) with
3818 (set reg expr) in that insn. */
3819 rtx list = mem_ptr->stores;
3821 for ( ; list != NULL_RTX; list = XEXP (list, 1))
3823 rtx insn = XEXP (list, 0);
3824 rtx pat = PATTERN (insn);
3825 rtx src = SET_SRC (pat);
3826 rtx reg = expr->reaching_reg;
3827 rtx copy;
3829 /* If we've already copied it, continue. */
3830 if (expr->reaching_reg == src)
3831 continue;
3833 if (dump_file)
3835 fprintf (dump_file, "PRE: store updated with reaching reg ");
3836 print_rtl (dump_file, reg);
3837 fprintf (dump_file, ":\n ");
3838 print_inline_rtx (dump_file, insn, 8);
3839 fprintf (dump_file, "\n");
3842 copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat)));
3843 emit_insn_before (copy, insn);
3844 SET_SRC (pat) = reg;
3845 df_insn_rescan (insn);
3847 /* un-recognize this pattern since it's probably different now. */
3848 INSN_CODE (insn) = -1;
3849 gcse_create_count++;
3854 /* Return true if the graph is too expensive to optimize. PASS is the
3855 optimization about to be performed. */
3857 static bool
3858 is_too_expensive (const char *pass)
3860 /* Trying to perform global optimizations on flow graphs which have
3861 a high connectivity will take a long time and is unlikely to be
3862 particularly useful.
3864 In normal circumstances a cfg should have about twice as many
3865 edges as blocks. But we do not want to punish small functions
3866 which have a couple switch statements. Rather than simply
3867 threshold the number of blocks, uses something with a more
3868 graceful degradation. */
3869 if (n_edges > 20000 + n_basic_blocks * 4)
3871 warning (OPT_Wdisabled_optimization,
3872 "%s: %d basic blocks and %d edges/basic block",
3873 pass, n_basic_blocks, n_edges / n_basic_blocks);
3875 return true;
3878 /* If allocating memory for the dataflow bitmaps would take up too much
3879 storage it's better just to disable the optimization. */
3880 if ((n_basic_blocks
3881 * SBITMAP_SET_SIZE (max_reg_num ())
3882 * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
3884 warning (OPT_Wdisabled_optimization,
3885 "%s: %d basic blocks and %d registers",
3886 pass, n_basic_blocks, max_reg_num ());
3888 return true;
3891 return false;
3894 /* All the passes implemented in this file. Each pass has its
3895 own gate and execute function, and at the end of the file a
3896 pass definition for passes.c.
3898 We do not construct an accurate cfg in functions which call
3899 setjmp, so none of these passes runs if the function calls
3900 setjmp.
3901 FIXME: Should just handle setjmp via REG_SETJMP notes. */
3903 static bool
3904 gate_rtl_pre (void)
3906 return optimize > 0 && flag_gcse
3907 && !cfun->calls_setjmp
3908 && optimize_function_for_speed_p (cfun)
3909 && dbg_cnt (pre);
3912 static unsigned int
3913 execute_rtl_pre (void)
3915 int changed;
3916 delete_unreachable_blocks ();
3917 df_analyze ();
3918 changed = one_pre_gcse_pass ();
3919 flag_rerun_cse_after_global_opts |= changed;
3920 if (changed)
3921 cleanup_cfg (0);
3922 return 0;
3925 static bool
3926 gate_rtl_hoist (void)
3928 return optimize > 0 && flag_gcse
3929 && !cfun->calls_setjmp
3930 /* It does not make sense to run code hoisting unless we are optimizing
3931 for code size -- it rarely makes programs faster, and can make then
3932 bigger if we did PRE (when optimizing for space, we don't run PRE). */
3933 && optimize_function_for_size_p (cfun)
3934 && dbg_cnt (hoist);
3937 static unsigned int
3938 execute_rtl_hoist (void)
3940 int changed;
3941 delete_unreachable_blocks ();
3942 df_analyze ();
3943 changed = one_code_hoisting_pass ();
3944 flag_rerun_cse_after_global_opts |= changed;
3945 if (changed)
3946 cleanup_cfg (0);
3947 return 0;
3950 struct rtl_opt_pass pass_rtl_pre =
3953 RTL_PASS,
3954 "rtl pre", /* name */
3955 gate_rtl_pre, /* gate */
3956 execute_rtl_pre, /* execute */
3957 NULL, /* sub */
3958 NULL, /* next */
3959 0, /* static_pass_number */
3960 TV_PRE, /* tv_id */
3961 PROP_cfglayout, /* properties_required */
3962 0, /* properties_provided */
3963 0, /* properties_destroyed */
3964 0, /* todo_flags_start */
3965 TODO_df_finish | TODO_verify_rtl_sharing |
3966 TODO_verify_flow | TODO_ggc_collect /* todo_flags_finish */
3970 struct rtl_opt_pass pass_rtl_hoist =
3973 RTL_PASS,
3974 "hoist", /* name */
3975 gate_rtl_hoist, /* gate */
3976 execute_rtl_hoist, /* execute */
3977 NULL, /* sub */
3978 NULL, /* next */
3979 0, /* static_pass_number */
3980 TV_HOIST, /* tv_id */
3981 PROP_cfglayout, /* properties_required */
3982 0, /* properties_provided */
3983 0, /* properties_destroyed */
3984 0, /* todo_flags_start */
3985 TODO_df_finish | TODO_verify_rtl_sharing |
3986 TODO_verify_flow | TODO_ggc_collect /* todo_flags_finish */
3990 #include "gt-gcse.h"