2014-10-24 Christophe Lyon <christophe.lyon@linaro.org>
[official-gcc.git] / gcc / gcse.c
blob9c62f8b3bb1d5a69c6aeacf839193593d79f5c97
1 /* Partial redundancy elimination / Hoisting for RTL.
2 Copyright (C) 1997-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* TODO
21 - reordering of memory allocation and freeing to be more space efficient
22 - calc rough register pressure information and use the info to drive all
23 kinds of code motion (including code hoisting) in a unified way.
26 /* References searched while implementing this.
28 Compilers Principles, Techniques and Tools
29 Aho, Sethi, Ullman
30 Addison-Wesley, 1988
32 Global Optimization by Suppression of Partial Redundancies
33 E. Morel, C. Renvoise
34 communications of the acm, Vol. 22, Num. 2, Feb. 1979
36 A Portable Machine-Independent Global Optimizer - Design and Measurements
37 Frederick Chow
38 Stanford Ph.D. thesis, Dec. 1983
40 A Fast Algorithm for Code Movement Optimization
41 D.M. Dhamdhere
42 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
44 A Solution to a Problem with Morel and Renvoise's
45 Global Optimization by Suppression of Partial Redundancies
46 K-H Drechsler, M.P. Stadel
47 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
49 Practical Adaptation of the Global Optimization
50 Algorithm of Morel and Renvoise
51 D.M. Dhamdhere
52 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
54 Efficiently Computing Static Single Assignment Form and the Control
55 Dependence Graph
56 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
57 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
59 Lazy Code Motion
60 J. Knoop, O. Ruthing, B. Steffen
61 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
63 What's In a Region? Or Computing Control Dependence Regions in Near-Linear
64 Time for Reducible Flow Control
65 Thomas Ball
66 ACM Letters on Programming Languages and Systems,
67 Vol. 2, Num. 1-4, Mar-Dec 1993
69 An Efficient Representation for Sparse Sets
70 Preston Briggs, Linda Torczon
71 ACM Letters on Programming Languages and Systems,
72 Vol. 2, Num. 1-4, Mar-Dec 1993
74 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
75 K-H Drechsler, M.P. Stadel
76 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
78 Partial Dead Code Elimination
79 J. Knoop, O. Ruthing, B. Steffen
80 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
82 Effective Partial Redundancy Elimination
83 P. Briggs, K.D. Cooper
84 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
86 The Program Structure Tree: Computing Control Regions in Linear Time
87 R. Johnson, D. Pearson, K. Pingali
88 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
90 Optimal Code Motion: Theory and Practice
91 J. Knoop, O. Ruthing, B. Steffen
92 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
94 The power of assignment motion
95 J. Knoop, O. Ruthing, B. Steffen
96 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
98 Global code motion / global value numbering
99 C. Click
100 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
102 Value Driven Redundancy Elimination
103 L.T. Simpson
104 Rice University Ph.D. thesis, Apr. 1996
106 Value Numbering
107 L.T. Simpson
108 Massively Scalar Compiler Project, Rice University, Sep. 1996
110 High Performance Compilers for Parallel Computing
111 Michael Wolfe
112 Addison-Wesley, 1996
114 Advanced Compiler Design and Implementation
115 Steven Muchnick
116 Morgan Kaufmann, 1997
118 Building an Optimizing Compiler
119 Robert Morgan
120 Digital Press, 1998
122 People wishing to speed up the code here should read:
123 Elimination Algorithms for Data Flow Analysis
124 B.G. Ryder, M.C. Paull
125 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
127 How to Analyze Large Programs Efficiently and Informatively
128 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
129 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
131 People wishing to do something different can find various possibilities
132 in the above papers and elsewhere.
135 #include "config.h"
136 #include "system.h"
137 #include "coretypes.h"
138 #include "tm.h"
139 #include "diagnostic-core.h"
140 #include "toplev.h"
142 #include "hard-reg-set.h"
143 #include "rtl.h"
144 #include "tree.h"
145 #include "tm_p.h"
146 #include "regs.h"
147 #include "ira.h"
148 #include "flags.h"
149 #include "insn-config.h"
150 #include "recog.h"
151 #include "basic-block.h"
152 #include "hashtab.h"
153 #include "hash-set.h"
154 #include "vec.h"
155 #include "machmode.h"
156 #include "input.h"
157 #include "function.h"
158 #include "expr.h"
159 #include "except.h"
160 #include "ggc.h"
161 #include "params.h"
162 #include "cselib.h"
163 #include "intl.h"
164 #include "obstack.h"
165 #include "tree-pass.h"
166 #include "hash-table.h"
167 #include "df.h"
168 #include "dbgcnt.h"
169 #include "target.h"
170 #include "gcse.h"
172 /* We support GCSE via Partial Redundancy Elimination. PRE optimizations
173 are a superset of those done by classic GCSE.
175 Two passes of copy/constant propagation are done around PRE or hoisting
176 because the first one enables more GCSE and the second one helps to clean
177 up the copies that PRE and HOIST create. This is needed more for PRE than
178 for HOIST because code hoisting will try to use an existing register
179 containing the common subexpression rather than create a new one. This is
180 harder to do for PRE because of the code motion (which HOIST doesn't do).
182 Expressions we are interested in GCSE-ing are of the form
183 (set (pseudo-reg) (expression)).
184 Function want_to_gcse_p says what these are.
186 In addition, expressions in REG_EQUAL notes are candidates for GCSE-ing.
187 This allows PRE to hoist expressions that are expressed in multiple insns,
188 such as complex address calculations (e.g. for PIC code, or loads with a
189 high part and a low part).
191 PRE handles moving invariant expressions out of loops (by treating them as
192 partially redundant).
194 **********************
196 We used to support multiple passes but there are diminishing returns in
197 doing so. The first pass usually makes 90% of the changes that are doable.
198 A second pass can make a few more changes made possible by the first pass.
199 Experiments show any further passes don't make enough changes to justify
200 the expense.
202 A study of spec92 using an unlimited number of passes:
203 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
204 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
205 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
207 It was found doing copy propagation between each pass enables further
208 substitutions.
210 This study was done before expressions in REG_EQUAL notes were added as
211 candidate expressions for optimization, and before the GIMPLE optimizers
212 were added. Probably, multiple passes is even less efficient now than
213 at the time when the study was conducted.
215 PRE is quite expensive in complicated functions because the DFA can take
216 a while to converge. Hence we only perform one pass.
218 **********************
220 The steps for PRE are:
222 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
224 2) Perform the data flow analysis for PRE.
226 3) Delete the redundant instructions
228 4) Insert the required copies [if any] that make the partially
229 redundant instructions fully redundant.
231 5) For other reaching expressions, insert an instruction to copy the value
232 to a newly created pseudo that will reach the redundant instruction.
234 The deletion is done first so that when we do insertions we
235 know which pseudo reg to use.
237 Various papers have argued that PRE DFA is expensive (O(n^2)) and others
238 argue it is not. The number of iterations for the algorithm to converge
239 is typically 2-4 so I don't view it as that expensive (relatively speaking).
241 PRE GCSE depends heavily on the second CPROP pass to clean up the copies
242 we create. To make an expression reach the place where it's redundant,
243 the result of the expression is copied to a new register, and the redundant
244 expression is deleted by replacing it with this new register. Classic GCSE
245 doesn't have this problem as much as it computes the reaching defs of
246 each register in each block and thus can try to use an existing
247 register. */
249 /* GCSE global vars. */
251 struct target_gcse default_target_gcse;
252 #if SWITCHABLE_TARGET
253 struct target_gcse *this_target_gcse = &default_target_gcse;
254 #endif
256 /* Set to non-zero if CSE should run after all GCSE optimizations are done. */
257 int flag_rerun_cse_after_global_opts;
259 /* An obstack for our working variables. */
260 static struct obstack gcse_obstack;
262 /* Hash table of expressions. */
264 struct gcse_expr
266 /* The expression. */
267 rtx expr;
268 /* Index in the available expression bitmaps. */
269 int bitmap_index;
270 /* Next entry with the same hash. */
271 struct gcse_expr *next_same_hash;
272 /* List of anticipatable occurrences in basic blocks in the function.
273 An "anticipatable occurrence" is one that is the first occurrence in the
274 basic block, the operands are not modified in the basic block prior
275 to the occurrence and the output is not used between the start of
276 the block and the occurrence. */
277 struct gcse_occr *antic_occr;
278 /* List of available occurrence in basic blocks in the function.
279 An "available occurrence" is one that is the last occurrence in the
280 basic block and the operands are not modified by following statements in
281 the basic block [including this insn]. */
282 struct gcse_occr *avail_occr;
283 /* Non-null if the computation is PRE redundant.
284 The value is the newly created pseudo-reg to record a copy of the
285 expression in all the places that reach the redundant copy. */
286 rtx reaching_reg;
287 /* Maximum distance in instructions this expression can travel.
288 We avoid moving simple expressions for more than a few instructions
289 to keep register pressure under control.
290 A value of "0" removes restrictions on how far the expression can
291 travel. */
292 int max_distance;
295 /* Occurrence of an expression.
296 There is one per basic block. If a pattern appears more than once the
297 last appearance is used [or first for anticipatable expressions]. */
299 struct gcse_occr
301 /* Next occurrence of this expression. */
302 struct gcse_occr *next;
303 /* The insn that computes the expression. */
304 rtx_insn *insn;
305 /* Nonzero if this [anticipatable] occurrence has been deleted. */
306 char deleted_p;
307 /* Nonzero if this [available] occurrence has been copied to
308 reaching_reg. */
309 /* ??? This is mutually exclusive with deleted_p, so they could share
310 the same byte. */
311 char copied_p;
314 typedef struct gcse_occr *occr_t;
316 /* Expression hash tables.
317 Each hash table is an array of buckets.
318 ??? It is known that if it were an array of entries, structure elements
319 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
320 not clear whether in the final analysis a sufficient amount of memory would
321 be saved as the size of the available expression bitmaps would be larger
322 [one could build a mapping table without holes afterwards though].
323 Someday I'll perform the computation and figure it out. */
325 struct gcse_hash_table_d
327 /* The table itself.
328 This is an array of `expr_hash_table_size' elements. */
329 struct gcse_expr **table;
331 /* Size of the hash table, in elements. */
332 unsigned int size;
334 /* Number of hash table elements. */
335 unsigned int n_elems;
338 /* Expression hash table. */
339 static struct gcse_hash_table_d expr_hash_table;
341 /* This is a list of expressions which are MEMs and will be used by load
342 or store motion.
343 Load motion tracks MEMs which aren't killed by anything except itself,
344 i.e. loads and stores to a single location.
345 We can then allow movement of these MEM refs with a little special
346 allowance. (all stores copy the same value to the reaching reg used
347 for the loads). This means all values used to store into memory must have
348 no side effects so we can re-issue the setter value. */
350 struct ls_expr
352 struct gcse_expr * expr; /* Gcse expression reference for LM. */
353 rtx pattern; /* Pattern of this mem. */
354 rtx pattern_regs; /* List of registers mentioned by the mem. */
355 rtx_insn_list *loads; /* INSN list of loads seen. */
356 rtx_insn_list *stores; /* INSN list of stores seen. */
357 struct ls_expr * next; /* Next in the list. */
358 int invalid; /* Invalid for some reason. */
359 int index; /* If it maps to a bitmap index. */
360 unsigned int hash_index; /* Index when in a hash table. */
361 rtx reaching_reg; /* Register to use when re-writing. */
364 /* Head of the list of load/store memory refs. */
365 static struct ls_expr * pre_ldst_mems = NULL;
367 struct pre_ldst_expr_hasher : typed_noop_remove <ls_expr>
369 typedef ls_expr value_type;
370 typedef value_type compare_type;
371 static inline hashval_t hash (const value_type *);
372 static inline bool equal (const value_type *, const compare_type *);
375 /* Hashtable helpers. */
376 inline hashval_t
377 pre_ldst_expr_hasher::hash (const value_type *x)
379 int do_not_record_p = 0;
380 return
381 hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false);
384 static int expr_equiv_p (const_rtx, const_rtx);
386 inline bool
387 pre_ldst_expr_hasher::equal (const value_type *ptr1,
388 const compare_type *ptr2)
390 return expr_equiv_p (ptr1->pattern, ptr2->pattern);
393 /* Hashtable for the load/store memory refs. */
394 static hash_table<pre_ldst_expr_hasher> *pre_ldst_table;
396 /* Bitmap containing one bit for each register in the program.
397 Used when performing GCSE to track which registers have been set since
398 the start of the basic block. */
399 static regset reg_set_bitmap;
401 /* Array, indexed by basic block number for a list of insns which modify
402 memory within that block. */
403 static vec<rtx_insn *> *modify_mem_list;
404 static bitmap modify_mem_list_set;
406 typedef struct modify_pair_s
408 rtx dest; /* A MEM. */
409 rtx dest_addr; /* The canonical address of `dest'. */
410 } modify_pair;
413 /* This array parallels modify_mem_list, except that it stores MEMs
414 being set and their canonicalized memory addresses. */
415 static vec<modify_pair> *canon_modify_mem_list;
417 /* Bitmap indexed by block numbers to record which blocks contain
418 function calls. */
419 static bitmap blocks_with_calls;
421 /* Various variables for statistics gathering. */
423 /* Memory used in a pass.
424 This isn't intended to be absolutely precise. Its intent is only
425 to keep an eye on memory usage. */
426 static int bytes_used;
428 /* GCSE substitutions made. */
429 static int gcse_subst_count;
430 /* Number of copy instructions created. */
431 static int gcse_create_count;
433 /* Doing code hoisting. */
434 static bool doing_code_hoisting_p = false;
436 /* For available exprs */
437 static sbitmap *ae_kill;
439 /* Data stored for each basic block. */
440 struct bb_data
442 /* Maximal register pressure inside basic block for given register class
443 (defined only for the pressure classes). */
444 int max_reg_pressure[N_REG_CLASSES];
445 /* Recorded register pressure of basic block before trying to hoist
446 an expression. Will be used to restore the register pressure
447 if the expression should not be hoisted. */
448 int old_pressure;
449 /* Recorded register live_in info of basic block during code hoisting
450 process. BACKUP is used to record live_in info before trying to
451 hoist an expression, and will be used to restore LIVE_IN if the
452 expression should not be hoisted. */
453 bitmap live_in, backup;
456 #define BB_DATA(bb) ((struct bb_data *) (bb)->aux)
458 static basic_block curr_bb;
460 /* Current register pressure for each pressure class. */
461 static int curr_reg_pressure[N_REG_CLASSES];
464 static void compute_can_copy (void);
465 static void *gmalloc (size_t) ATTRIBUTE_MALLOC;
466 static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC;
467 static void *gcse_alloc (unsigned long);
468 static void alloc_gcse_mem (void);
469 static void free_gcse_mem (void);
470 static void hash_scan_insn (rtx_insn *, struct gcse_hash_table_d *);
471 static void hash_scan_set (rtx, rtx_insn *, struct gcse_hash_table_d *);
472 static void hash_scan_clobber (rtx, rtx_insn *, struct gcse_hash_table_d *);
473 static void hash_scan_call (rtx, rtx_insn *, struct gcse_hash_table_d *);
474 static int want_to_gcse_p (rtx, int *);
475 static int oprs_unchanged_p (const_rtx, const rtx_insn *, int);
476 static int oprs_anticipatable_p (const_rtx, const rtx_insn *);
477 static int oprs_available_p (const_rtx, const rtx_insn *);
478 static void insert_expr_in_table (rtx, enum machine_mode, rtx_insn *, int, int,
479 int, struct gcse_hash_table_d *);
480 static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int);
481 static void record_last_reg_set_info (rtx, int);
482 static void record_last_mem_set_info (rtx_insn *);
483 static void record_last_set_info (rtx, const_rtx, void *);
484 static void compute_hash_table (struct gcse_hash_table_d *);
485 static void alloc_hash_table (struct gcse_hash_table_d *);
486 static void free_hash_table (struct gcse_hash_table_d *);
487 static void compute_hash_table_work (struct gcse_hash_table_d *);
488 static void dump_hash_table (FILE *, const char *, struct gcse_hash_table_d *);
489 static void compute_transp (const_rtx, int, sbitmap *);
490 static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
491 struct gcse_hash_table_d *);
492 static void mems_conflict_for_gcse_p (rtx, const_rtx, void *);
493 static int load_killed_in_block_p (const_basic_block, int, const_rtx, int);
494 static void canon_list_insert (rtx, const_rtx, void *);
495 static void alloc_pre_mem (int, int);
496 static void free_pre_mem (void);
497 static struct edge_list *compute_pre_data (void);
498 static int pre_expr_reaches_here_p (basic_block, struct gcse_expr *,
499 basic_block);
500 static void insert_insn_end_basic_block (struct gcse_expr *, basic_block);
501 static void pre_insert_copy_insn (struct gcse_expr *, rtx_insn *);
502 static void pre_insert_copies (void);
503 static int pre_delete (void);
504 static int pre_gcse (struct edge_list *);
505 static int one_pre_gcse_pass (void);
506 static void add_label_notes (rtx, rtx);
507 static void alloc_code_hoist_mem (int, int);
508 static void free_code_hoist_mem (void);
509 static void compute_code_hoist_vbeinout (void);
510 static void compute_code_hoist_data (void);
511 static int should_hoist_expr_to_dom (basic_block, struct gcse_expr *, basic_block,
512 sbitmap, int, int *, enum reg_class,
513 int *, bitmap, rtx_insn *);
514 static int hoist_code (void);
515 static enum reg_class get_regno_pressure_class (int regno, int *nregs);
516 static enum reg_class get_pressure_class_and_nregs (rtx_insn *insn, int *nregs);
517 static int one_code_hoisting_pass (void);
518 static rtx_insn *process_insert_insn (struct gcse_expr *);
519 static int pre_edge_insert (struct edge_list *, struct gcse_expr **);
520 static int pre_expr_reaches_here_p_work (basic_block, struct gcse_expr *,
521 basic_block, char *);
522 static struct ls_expr * ldst_entry (rtx);
523 static void free_ldst_entry (struct ls_expr *);
524 static void free_ld_motion_mems (void);
525 static void print_ldst_list (FILE *);
526 static struct ls_expr * find_rtx_in_ldst (rtx);
527 static int simple_mem (const_rtx);
528 static void invalidate_any_buried_refs (rtx);
529 static void compute_ld_motion_mems (void);
530 static void trim_ld_motion_mems (void);
531 static void update_ld_motion_stores (struct gcse_expr *);
532 static void clear_modify_mem_tables (void);
533 static void free_modify_mem_tables (void);
534 static rtx gcse_emit_move_after (rtx, rtx, rtx_insn *);
535 static bool is_too_expensive (const char *);
537 #define GNEW(T) ((T *) gmalloc (sizeof (T)))
538 #define GCNEW(T) ((T *) gcalloc (1, sizeof (T)))
540 #define GNEWVEC(T, N) ((T *) gmalloc (sizeof (T) * (N)))
541 #define GCNEWVEC(T, N) ((T *) gcalloc ((N), sizeof (T)))
543 #define GNEWVAR(T, S) ((T *) gmalloc ((S)))
544 #define GCNEWVAR(T, S) ((T *) gcalloc (1, (S)))
546 #define GOBNEW(T) ((T *) gcse_alloc (sizeof (T)))
547 #define GOBNEWVAR(T, S) ((T *) gcse_alloc ((S)))
549 /* Misc. utilities. */
551 #define can_copy \
552 (this_target_gcse->x_can_copy)
553 #define can_copy_init_p \
554 (this_target_gcse->x_can_copy_init_p)
556 /* Compute which modes support reg/reg copy operations. */
558 static void
559 compute_can_copy (void)
561 int i;
562 #ifndef AVOID_CCMODE_COPIES
563 rtx reg, insn;
564 #endif
565 memset (can_copy, 0, NUM_MACHINE_MODES);
567 start_sequence ();
568 for (i = 0; i < NUM_MACHINE_MODES; i++)
569 if (GET_MODE_CLASS (i) == MODE_CC)
571 #ifdef AVOID_CCMODE_COPIES
572 can_copy[i] = 0;
573 #else
574 reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
575 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
576 if (recog (PATTERN (insn), insn, NULL) >= 0)
577 can_copy[i] = 1;
578 #endif
580 else
581 can_copy[i] = 1;
583 end_sequence ();
586 /* Returns whether the mode supports reg/reg copy operations. */
588 bool
589 can_copy_p (enum machine_mode mode)
591 if (! can_copy_init_p)
593 compute_can_copy ();
594 can_copy_init_p = true;
597 return can_copy[mode] != 0;
600 /* Cover function to xmalloc to record bytes allocated. */
602 static void *
603 gmalloc (size_t size)
605 bytes_used += size;
606 return xmalloc (size);
609 /* Cover function to xcalloc to record bytes allocated. */
611 static void *
612 gcalloc (size_t nelem, size_t elsize)
614 bytes_used += nelem * elsize;
615 return xcalloc (nelem, elsize);
618 /* Cover function to obstack_alloc. */
620 static void *
621 gcse_alloc (unsigned long size)
623 bytes_used += size;
624 return obstack_alloc (&gcse_obstack, size);
627 /* Allocate memory for the reg/memory set tracking tables.
628 This is called at the start of each pass. */
630 static void
631 alloc_gcse_mem (void)
633 /* Allocate vars to track sets of regs. */
634 reg_set_bitmap = ALLOC_REG_SET (NULL);
636 /* Allocate array to keep a list of insns which modify memory in each
637 basic block. The two typedefs are needed to work around the
638 pre-processor limitation with template types in macro arguments. */
639 typedef vec<rtx_insn *> vec_rtx_heap;
640 typedef vec<modify_pair> vec_modify_pair_heap;
641 modify_mem_list = GCNEWVEC (vec_rtx_heap, last_basic_block_for_fn (cfun));
642 canon_modify_mem_list = GCNEWVEC (vec_modify_pair_heap,
643 last_basic_block_for_fn (cfun));
644 modify_mem_list_set = BITMAP_ALLOC (NULL);
645 blocks_with_calls = BITMAP_ALLOC (NULL);
648 /* Free memory allocated by alloc_gcse_mem. */
650 static void
651 free_gcse_mem (void)
653 FREE_REG_SET (reg_set_bitmap);
655 free_modify_mem_tables ();
656 BITMAP_FREE (modify_mem_list_set);
657 BITMAP_FREE (blocks_with_calls);
660 /* Compute the local properties of each recorded expression.
662 Local properties are those that are defined by the block, irrespective of
663 other blocks.
665 An expression is transparent in a block if its operands are not modified
666 in the block.
668 An expression is computed (locally available) in a block if it is computed
669 at least once and expression would contain the same value if the
670 computation was moved to the end of the block.
672 An expression is locally anticipatable in a block if it is computed at
673 least once and expression would contain the same value if the computation
674 was moved to the beginning of the block.
676 We call this routine for pre and code hoisting. They all compute
677 basically the same information and thus can easily share this code.
679 TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
680 properties. If NULL, then it is not necessary to compute or record that
681 particular property.
683 TABLE controls which hash table to look at. */
685 static void
686 compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc,
687 struct gcse_hash_table_d *table)
689 unsigned int i;
691 /* Initialize any bitmaps that were passed in. */
692 if (transp)
694 bitmap_vector_ones (transp, last_basic_block_for_fn (cfun));
697 if (comp)
698 bitmap_vector_clear (comp, last_basic_block_for_fn (cfun));
699 if (antloc)
700 bitmap_vector_clear (antloc, last_basic_block_for_fn (cfun));
702 for (i = 0; i < table->size; i++)
704 struct gcse_expr *expr;
706 for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
708 int indx = expr->bitmap_index;
709 struct gcse_occr *occr;
711 /* The expression is transparent in this block if it is not killed.
712 We start by assuming all are transparent [none are killed], and
713 then reset the bits for those that are. */
714 if (transp)
715 compute_transp (expr->expr, indx, transp);
717 /* The occurrences recorded in antic_occr are exactly those that
718 we want to set to nonzero in ANTLOC. */
719 if (antloc)
720 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
722 bitmap_set_bit (antloc[BLOCK_FOR_INSN (occr->insn)->index], indx);
724 /* While we're scanning the table, this is a good place to
725 initialize this. */
726 occr->deleted_p = 0;
729 /* The occurrences recorded in avail_occr are exactly those that
730 we want to set to nonzero in COMP. */
731 if (comp)
732 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
734 bitmap_set_bit (comp[BLOCK_FOR_INSN (occr->insn)->index], indx);
736 /* While we're scanning the table, this is a good place to
737 initialize this. */
738 occr->copied_p = 0;
741 /* While we're scanning the table, this is a good place to
742 initialize this. */
743 expr->reaching_reg = 0;
748 /* Hash table support. */
750 struct reg_avail_info
752 basic_block last_bb;
753 int first_set;
754 int last_set;
757 static struct reg_avail_info *reg_avail_info;
758 static basic_block current_bb;
760 /* See whether X, the source of a set, is something we want to consider for
761 GCSE. */
763 static int
764 want_to_gcse_p (rtx x, int *max_distance_ptr)
766 #ifdef STACK_REGS
767 /* On register stack architectures, don't GCSE constants from the
768 constant pool, as the benefits are often swamped by the overhead
769 of shuffling the register stack between basic blocks. */
770 if (IS_STACK_MODE (GET_MODE (x)))
771 x = avoid_constant_pool_reference (x);
772 #endif
774 /* GCSE'ing constants:
776 We do not specifically distinguish between constant and non-constant
777 expressions in PRE and Hoist. We use set_src_cost below to limit
778 the maximum distance simple expressions can travel.
780 Nevertheless, constants are much easier to GCSE, and, hence,
781 it is easy to overdo the optimizations. Usually, excessive PRE and
782 Hoisting of constant leads to increased register pressure.
784 RA can deal with this by rematerialing some of the constants.
785 Therefore, it is important that the back-end generates sets of constants
786 in a way that allows reload rematerialize them under high register
787 pressure, i.e., a pseudo register with REG_EQUAL to constant
788 is set only once. Failing to do so will result in IRA/reload
789 spilling such constants under high register pressure instead of
790 rematerializing them. */
792 switch (GET_CODE (x))
794 case REG:
795 case SUBREG:
796 case CALL:
797 return 0;
799 CASE_CONST_ANY:
800 if (!doing_code_hoisting_p)
801 /* Do not PRE constants. */
802 return 0;
804 /* FALLTHRU */
806 default:
807 if (doing_code_hoisting_p)
808 /* PRE doesn't implement max_distance restriction. */
810 int cost;
811 int max_distance;
813 gcc_assert (!optimize_function_for_speed_p (cfun)
814 && optimize_function_for_size_p (cfun));
815 cost = set_src_cost (x, 0);
817 if (cost < COSTS_N_INSNS (GCSE_UNRESTRICTED_COST))
819 max_distance = (GCSE_COST_DISTANCE_RATIO * cost) / 10;
820 if (max_distance == 0)
821 return 0;
823 gcc_assert (max_distance > 0);
825 else
826 max_distance = 0;
828 if (max_distance_ptr)
829 *max_distance_ptr = max_distance;
832 return can_assign_to_reg_without_clobbers_p (x);
836 /* Used internally by can_assign_to_reg_without_clobbers_p. */
838 static GTY(()) rtx_insn *test_insn;
840 /* Return true if we can assign X to a pseudo register such that the
841 resulting insn does not result in clobbering a hard register as a
842 side-effect.
844 Additionally, if the target requires it, check that the resulting insn
845 can be copied. If it cannot, this means that X is special and probably
846 has hidden side-effects we don't want to mess with.
848 This function is typically used by code motion passes, to verify
849 that it is safe to insert an insn without worrying about clobbering
850 maybe live hard regs. */
852 bool
853 can_assign_to_reg_without_clobbers_p (rtx x)
855 int num_clobbers = 0;
856 int icode;
857 bool can_assign = false;
859 /* If this is a valid operand, we are OK. If it's VOIDmode, we aren't. */
860 if (general_operand (x, GET_MODE (x)))
861 return 1;
862 else if (GET_MODE (x) == VOIDmode)
863 return 0;
865 /* Otherwise, check if we can make a valid insn from it. First initialize
866 our test insn if we haven't already. */
867 if (test_insn == 0)
869 test_insn
870 = make_insn_raw (gen_rtx_SET (VOIDmode,
871 gen_rtx_REG (word_mode,
872 FIRST_PSEUDO_REGISTER * 2),
873 const0_rtx));
874 SET_NEXT_INSN (test_insn) = SET_PREV_INSN (test_insn) = 0;
875 INSN_LOCATION (test_insn) = UNKNOWN_LOCATION;
878 /* Now make an insn like the one we would make when GCSE'ing and see if
879 valid. */
880 PUT_MODE (SET_DEST (PATTERN (test_insn)), GET_MODE (x));
881 SET_SRC (PATTERN (test_insn)) = x;
883 icode = recog (PATTERN (test_insn), test_insn, &num_clobbers);
885 /* If the test insn is valid and doesn't need clobbers, and the target also
886 has no objections, we're good. */
887 if (icode >= 0
888 && (num_clobbers == 0 || !added_clobbers_hard_reg_p (icode))
889 && ! (targetm.cannot_copy_insn_p
890 && targetm.cannot_copy_insn_p (test_insn)))
891 can_assign = true;
893 /* Make sure test_insn doesn't have any pointers into GC space. */
894 SET_SRC (PATTERN (test_insn)) = NULL_RTX;
896 return can_assign;
899 /* Return nonzero if the operands of expression X are unchanged from the
900 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
901 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */
903 static int
904 oprs_unchanged_p (const_rtx x, const rtx_insn *insn, int avail_p)
906 int i, j;
907 enum rtx_code code;
908 const char *fmt;
910 if (x == 0)
911 return 1;
913 code = GET_CODE (x);
914 switch (code)
916 case REG:
918 struct reg_avail_info *info = &reg_avail_info[REGNO (x)];
920 if (info->last_bb != current_bb)
921 return 1;
922 if (avail_p)
923 return info->last_set < DF_INSN_LUID (insn);
924 else
925 return info->first_set >= DF_INSN_LUID (insn);
928 case MEM:
929 if (! flag_gcse_lm
930 || load_killed_in_block_p (current_bb, DF_INSN_LUID (insn),
931 x, avail_p))
932 return 0;
933 else
934 return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
936 case PRE_DEC:
937 case PRE_INC:
938 case POST_DEC:
939 case POST_INC:
940 case PRE_MODIFY:
941 case POST_MODIFY:
942 return 0;
944 case PC:
945 case CC0: /*FIXME*/
946 case CONST:
947 CASE_CONST_ANY:
948 case SYMBOL_REF:
949 case LABEL_REF:
950 case ADDR_VEC:
951 case ADDR_DIFF_VEC:
952 return 1;
954 default:
955 break;
958 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
960 if (fmt[i] == 'e')
962 /* If we are about to do the last recursive call needed at this
963 level, change it into iteration. This function is called enough
964 to be worth it. */
965 if (i == 0)
966 return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
968 else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
969 return 0;
971 else if (fmt[i] == 'E')
972 for (j = 0; j < XVECLEN (x, i); j++)
973 if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
974 return 0;
977 return 1;
980 /* Info passed from load_killed_in_block_p to mems_conflict_for_gcse_p. */
982 struct mem_conflict_info
984 /* A memory reference for a load instruction, mems_conflict_for_gcse_p will
985 see if a memory store conflicts with this memory load. */
986 const_rtx mem;
988 /* True if mems_conflict_for_gcse_p finds a conflict between two memory
989 references. */
990 bool conflict;
993 /* DEST is the output of an instruction. If it is a memory reference and
994 possibly conflicts with the load found in DATA, then communicate this
995 information back through DATA. */
997 static void
998 mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
999 void *data)
1001 struct mem_conflict_info *mci = (struct mem_conflict_info *) data;
1003 while (GET_CODE (dest) == SUBREG
1004 || GET_CODE (dest) == ZERO_EXTRACT
1005 || GET_CODE (dest) == STRICT_LOW_PART)
1006 dest = XEXP (dest, 0);
1008 /* If DEST is not a MEM, then it will not conflict with the load. Note
1009 that function calls are assumed to clobber memory, but are handled
1010 elsewhere. */
1011 if (! MEM_P (dest))
1012 return;
1014 /* If we are setting a MEM in our list of specially recognized MEMs,
1015 don't mark as killed this time. */
1016 if (pre_ldst_mems != NULL && expr_equiv_p (dest, mci->mem))
1018 if (!find_rtx_in_ldst (dest))
1019 mci->conflict = true;
1020 return;
1023 if (true_dependence (dest, GET_MODE (dest), mci->mem))
1024 mci->conflict = true;
1027 /* Return nonzero if the expression in X (a memory reference) is killed
1028 in block BB before or after the insn with the LUID in UID_LIMIT.
1029 AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
1030 before UID_LIMIT.
1032 To check the entire block, set UID_LIMIT to max_uid + 1 and
1033 AVAIL_P to 0. */
1035 static int
1036 load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x,
1037 int avail_p)
1039 vec<rtx_insn *> list = modify_mem_list[bb->index];
1040 rtx_insn *setter;
1041 unsigned ix;
1043 /* If this is a readonly then we aren't going to be changing it. */
1044 if (MEM_READONLY_P (x))
1045 return 0;
1047 FOR_EACH_VEC_ELT_REVERSE (list, ix, setter)
1049 struct mem_conflict_info mci;
1051 /* Ignore entries in the list that do not apply. */
1052 if ((avail_p
1053 && DF_INSN_LUID (setter) < uid_limit)
1054 || (! avail_p
1055 && DF_INSN_LUID (setter) > uid_limit))
1056 continue;
1058 /* If SETTER is a call everything is clobbered. Note that calls
1059 to pure functions are never put on the list, so we need not
1060 worry about them. */
1061 if (CALL_P (setter))
1062 return 1;
1064 /* SETTER must be an INSN of some kind that sets memory. Call
1065 note_stores to examine each hunk of memory that is modified. */
1066 mci.mem = x;
1067 mci.conflict = false;
1068 note_stores (PATTERN (setter), mems_conflict_for_gcse_p, &mci);
1069 if (mci.conflict)
1070 return 1;
1072 return 0;
1075 /* Return nonzero if the operands of expression X are unchanged from
1076 the start of INSN's basic block up to but not including INSN. */
1078 static int
1079 oprs_anticipatable_p (const_rtx x, const rtx_insn *insn)
1081 return oprs_unchanged_p (x, insn, 0);
1084 /* Return nonzero if the operands of expression X are unchanged from
1085 INSN to the end of INSN's basic block. */
1087 static int
1088 oprs_available_p (const_rtx x, const rtx_insn *insn)
1090 return oprs_unchanged_p (x, insn, 1);
1093 /* Hash expression X.
1095 MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean
1096 indicating if a volatile operand is found or if the expression contains
1097 something we don't want to insert in the table. HASH_TABLE_SIZE is
1098 the current size of the hash table to be probed. */
1100 static unsigned int
1101 hash_expr (const_rtx x, enum machine_mode mode, int *do_not_record_p,
1102 int hash_table_size)
1104 unsigned int hash;
1106 *do_not_record_p = 0;
1108 hash = hash_rtx (x, mode, do_not_record_p, NULL, /*have_reg_qty=*/false);
1109 return hash % hash_table_size;
1112 /* Return nonzero if exp1 is equivalent to exp2. */
1114 static int
1115 expr_equiv_p (const_rtx x, const_rtx y)
1117 return exp_equiv_p (x, y, 0, true);
1120 /* Insert expression X in INSN in the hash TABLE.
1121 If it is already present, record it as the last occurrence in INSN's
1122 basic block.
1124 MODE is the mode of the value X is being stored into.
1125 It is only used if X is a CONST_INT.
1127 ANTIC_P is nonzero if X is an anticipatable expression.
1128 AVAIL_P is nonzero if X is an available expression.
1130 MAX_DISTANCE is the maximum distance in instructions this expression can
1131 be moved. */
1133 static void
1134 insert_expr_in_table (rtx x, enum machine_mode mode, rtx_insn *insn,
1135 int antic_p,
1136 int avail_p, int max_distance, struct gcse_hash_table_d *table)
1138 int found, do_not_record_p;
1139 unsigned int hash;
1140 struct gcse_expr *cur_expr, *last_expr = NULL;
1141 struct gcse_occr *antic_occr, *avail_occr;
1143 hash = hash_expr (x, mode, &do_not_record_p, table->size);
1145 /* Do not insert expression in table if it contains volatile operands,
1146 or if hash_expr determines the expression is something we don't want
1147 to or can't handle. */
1148 if (do_not_record_p)
1149 return;
1151 cur_expr = table->table[hash];
1152 found = 0;
1154 while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1156 /* If the expression isn't found, save a pointer to the end of
1157 the list. */
1158 last_expr = cur_expr;
1159 cur_expr = cur_expr->next_same_hash;
1162 if (! found)
1164 cur_expr = GOBNEW (struct gcse_expr);
1165 bytes_used += sizeof (struct gcse_expr);
1166 if (table->table[hash] == NULL)
1167 /* This is the first pattern that hashed to this index. */
1168 table->table[hash] = cur_expr;
1169 else
1170 /* Add EXPR to end of this hash chain. */
1171 last_expr->next_same_hash = cur_expr;
1173 /* Set the fields of the expr element. */
1174 cur_expr->expr = x;
1175 cur_expr->bitmap_index = table->n_elems++;
1176 cur_expr->next_same_hash = NULL;
1177 cur_expr->antic_occr = NULL;
1178 cur_expr->avail_occr = NULL;
1179 gcc_assert (max_distance >= 0);
1180 cur_expr->max_distance = max_distance;
1182 else
1183 gcc_assert (cur_expr->max_distance == max_distance);
1185 /* Now record the occurrence(s). */
1186 if (antic_p)
1188 antic_occr = cur_expr->antic_occr;
1190 if (antic_occr
1191 && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn))
1192 antic_occr = NULL;
1194 if (antic_occr)
1195 /* Found another instance of the expression in the same basic block.
1196 Prefer the currently recorded one. We want the first one in the
1197 block and the block is scanned from start to end. */
1198 ; /* nothing to do */
1199 else
1201 /* First occurrence of this expression in this basic block. */
1202 antic_occr = GOBNEW (struct gcse_occr);
1203 bytes_used += sizeof (struct gcse_occr);
1204 antic_occr->insn = insn;
1205 antic_occr->next = cur_expr->antic_occr;
1206 antic_occr->deleted_p = 0;
1207 cur_expr->antic_occr = antic_occr;
1211 if (avail_p)
1213 avail_occr = cur_expr->avail_occr;
1215 if (avail_occr
1216 && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn))
1218 /* Found another instance of the expression in the same basic block.
1219 Prefer this occurrence to the currently recorded one. We want
1220 the last one in the block and the block is scanned from start
1221 to end. */
1222 avail_occr->insn = insn;
1224 else
1226 /* First occurrence of this expression in this basic block. */
1227 avail_occr = GOBNEW (struct gcse_occr);
1228 bytes_used += sizeof (struct gcse_occr);
1229 avail_occr->insn = insn;
1230 avail_occr->next = cur_expr->avail_occr;
1231 avail_occr->deleted_p = 0;
1232 cur_expr->avail_occr = avail_occr;
1237 /* Scan SET present in INSN and add an entry to the hash TABLE. */
1239 static void
1240 hash_scan_set (rtx set, rtx_insn *insn, struct gcse_hash_table_d *table)
1242 rtx src = SET_SRC (set);
1243 rtx dest = SET_DEST (set);
1244 rtx note;
1246 if (GET_CODE (src) == CALL)
1247 hash_scan_call (src, insn, table);
1249 else if (REG_P (dest))
1251 unsigned int regno = REGNO (dest);
1252 int max_distance = 0;
1254 /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
1256 This allows us to do a single GCSE pass and still eliminate
1257 redundant constants, addresses or other expressions that are
1258 constructed with multiple instructions.
1260 However, keep the original SRC if INSN is a simple reg-reg move.
1261 In this case, there will almost always be a REG_EQUAL note on the
1262 insn that sets SRC. By recording the REG_EQUAL value here as SRC
1263 for INSN, we miss copy propagation opportunities and we perform the
1264 same PRE GCSE operation repeatedly on the same REG_EQUAL value if we
1265 do more than one PRE GCSE pass.
1267 Note that this does not impede profitable constant propagations. We
1268 "look through" reg-reg sets in lookup_avail_set. */
1269 note = find_reg_equal_equiv_note (insn);
1270 if (note != 0
1271 && REG_NOTE_KIND (note) == REG_EQUAL
1272 && !REG_P (src)
1273 && want_to_gcse_p (XEXP (note, 0), NULL))
1274 src = XEXP (note, 0), set = gen_rtx_SET (VOIDmode, dest, src);
1276 /* Only record sets of pseudo-regs in the hash table. */
1277 if (regno >= FIRST_PSEUDO_REGISTER
1278 /* Don't GCSE something if we can't do a reg/reg copy. */
1279 && can_copy_p (GET_MODE (dest))
1280 /* GCSE commonly inserts instruction after the insn. We can't
1281 do that easily for EH edges so disable GCSE on these for now. */
1282 /* ??? We can now easily create new EH landing pads at the
1283 gimple level, for splitting edges; there's no reason we
1284 can't do the same thing at the rtl level. */
1285 && !can_throw_internal (insn)
1286 /* Is SET_SRC something we want to gcse? */
1287 && want_to_gcse_p (src, &max_distance)
1288 /* Don't CSE a nop. */
1289 && ! set_noop_p (set)
1290 /* Don't GCSE if it has attached REG_EQUIV note.
1291 At this point this only function parameters should have
1292 REG_EQUIV notes and if the argument slot is used somewhere
1293 explicitly, it means address of parameter has been taken,
1294 so we should not extend the lifetime of the pseudo. */
1295 && (note == NULL_RTX || ! MEM_P (XEXP (note, 0))))
1297 /* An expression is not anticipatable if its operands are
1298 modified before this insn or if this is not the only SET in
1299 this insn. The latter condition does not have to mean that
1300 SRC itself is not anticipatable, but we just will not be
1301 able to handle code motion of insns with multiple sets. */
1302 int antic_p = oprs_anticipatable_p (src, insn)
1303 && !multiple_sets (insn);
1304 /* An expression is not available if its operands are
1305 subsequently modified, including this insn. It's also not
1306 available if this is a branch, because we can't insert
1307 a set after the branch. */
1308 int avail_p = (oprs_available_p (src, insn)
1309 && ! JUMP_P (insn));
1311 insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p,
1312 max_distance, table);
1315 /* In case of store we want to consider the memory value as available in
1316 the REG stored in that memory. This makes it possible to remove
1317 redundant loads from due to stores to the same location. */
1318 else if (flag_gcse_las && REG_P (src) && MEM_P (dest))
1320 unsigned int regno = REGNO (src);
1321 int max_distance = 0;
1323 /* Only record sets of pseudo-regs in the hash table. */
1324 if (regno >= FIRST_PSEUDO_REGISTER
1325 /* Don't GCSE something if we can't do a reg/reg copy. */
1326 && can_copy_p (GET_MODE (src))
1327 /* GCSE commonly inserts instruction after the insn. We can't
1328 do that easily for EH edges so disable GCSE on these for now. */
1329 && !can_throw_internal (insn)
1330 /* Is SET_DEST something we want to gcse? */
1331 && want_to_gcse_p (dest, &max_distance)
1332 /* Don't CSE a nop. */
1333 && ! set_noop_p (set)
1334 /* Don't GCSE if it has attached REG_EQUIV note.
1335 At this point this only function parameters should have
1336 REG_EQUIV notes and if the argument slot is used somewhere
1337 explicitly, it means address of parameter has been taken,
1338 so we should not extend the lifetime of the pseudo. */
1339 && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
1340 || ! MEM_P (XEXP (note, 0))))
1342 /* Stores are never anticipatable. */
1343 int antic_p = 0;
1344 /* An expression is not available if its operands are
1345 subsequently modified, including this insn. It's also not
1346 available if this is a branch, because we can't insert
1347 a set after the branch. */
1348 int avail_p = oprs_available_p (dest, insn)
1349 && ! JUMP_P (insn);
1351 /* Record the memory expression (DEST) in the hash table. */
1352 insert_expr_in_table (dest, GET_MODE (dest), insn,
1353 antic_p, avail_p, max_distance, table);
1358 static void
1359 hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx_insn *insn ATTRIBUTE_UNUSED,
1360 struct gcse_hash_table_d *table ATTRIBUTE_UNUSED)
1362 /* Currently nothing to do. */
1365 static void
1366 hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx_insn *insn ATTRIBUTE_UNUSED,
1367 struct gcse_hash_table_d *table ATTRIBUTE_UNUSED)
1369 /* Currently nothing to do. */
1372 /* Process INSN and add hash table entries as appropriate. */
1374 static void
1375 hash_scan_insn (rtx_insn *insn, struct gcse_hash_table_d *table)
1377 rtx pat = PATTERN (insn);
1378 int i;
1380 /* Pick out the sets of INSN and for other forms of instructions record
1381 what's been modified. */
1383 if (GET_CODE (pat) == SET)
1384 hash_scan_set (pat, insn, table);
1386 else if (GET_CODE (pat) == CLOBBER)
1387 hash_scan_clobber (pat, insn, table);
1389 else if (GET_CODE (pat) == CALL)
1390 hash_scan_call (pat, insn, table);
1392 else if (GET_CODE (pat) == PARALLEL)
1393 for (i = 0; i < XVECLEN (pat, 0); i++)
1395 rtx x = XVECEXP (pat, 0, i);
1397 if (GET_CODE (x) == SET)
1398 hash_scan_set (x, insn, table);
1399 else if (GET_CODE (x) == CLOBBER)
1400 hash_scan_clobber (x, insn, table);
1401 else if (GET_CODE (x) == CALL)
1402 hash_scan_call (x, insn, table);
1406 /* Dump the hash table TABLE to file FILE under the name NAME. */
1408 static void
1409 dump_hash_table (FILE *file, const char *name, struct gcse_hash_table_d *table)
1411 int i;
1412 /* Flattened out table, so it's printed in proper order. */
1413 struct gcse_expr **flat_table;
1414 unsigned int *hash_val;
1415 struct gcse_expr *expr;
1417 flat_table = XCNEWVEC (struct gcse_expr *, table->n_elems);
1418 hash_val = XNEWVEC (unsigned int, table->n_elems);
1420 for (i = 0; i < (int) table->size; i++)
1421 for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
1423 flat_table[expr->bitmap_index] = expr;
1424 hash_val[expr->bitmap_index] = i;
1427 fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1428 name, table->size, table->n_elems);
1430 for (i = 0; i < (int) table->n_elems; i++)
1431 if (flat_table[i] != 0)
1433 expr = flat_table[i];
1434 fprintf (file, "Index %d (hash value %d; max distance %d)\n ",
1435 expr->bitmap_index, hash_val[i], expr->max_distance);
1436 print_rtl (file, expr->expr);
1437 fprintf (file, "\n");
1440 fprintf (file, "\n");
1442 free (flat_table);
1443 free (hash_val);
1446 /* Record register first/last/block set information for REGNO in INSN.
1448 first_set records the first place in the block where the register
1449 is set and is used to compute "anticipatability".
1451 last_set records the last place in the block where the register
1452 is set and is used to compute "availability".
1454 last_bb records the block for which first_set and last_set are
1455 valid, as a quick test to invalidate them. */
1457 static void
1458 record_last_reg_set_info (rtx insn, int regno)
1460 struct reg_avail_info *info = &reg_avail_info[regno];
1461 int luid = DF_INSN_LUID (insn);
1463 info->last_set = luid;
1464 if (info->last_bb != current_bb)
1466 info->last_bb = current_bb;
1467 info->first_set = luid;
1471 /* Record all of the canonicalized MEMs of record_last_mem_set_info's insn.
1472 Note we store a pair of elements in the list, so they have to be
1473 taken off pairwise. */
1475 static void
1476 canon_list_insert (rtx dest ATTRIBUTE_UNUSED, const_rtx x ATTRIBUTE_UNUSED,
1477 void * v_insn)
1479 rtx dest_addr, insn;
1480 int bb;
1481 modify_pair pair;
1483 while (GET_CODE (dest) == SUBREG
1484 || GET_CODE (dest) == ZERO_EXTRACT
1485 || GET_CODE (dest) == STRICT_LOW_PART)
1486 dest = XEXP (dest, 0);
1488 /* If DEST is not a MEM, then it will not conflict with a load. Note
1489 that function calls are assumed to clobber memory, but are handled
1490 elsewhere. */
1492 if (! MEM_P (dest))
1493 return;
1495 dest_addr = get_addr (XEXP (dest, 0));
1496 dest_addr = canon_rtx (dest_addr);
1497 insn = (rtx) v_insn;
1498 bb = BLOCK_FOR_INSN (insn)->index;
1500 pair.dest = dest;
1501 pair.dest_addr = dest_addr;
1502 canon_modify_mem_list[bb].safe_push (pair);
1505 /* Record memory modification information for INSN. We do not actually care
1506 about the memory location(s) that are set, or even how they are set (consider
1507 a CALL_INSN). We merely need to record which insns modify memory. */
1509 static void
1510 record_last_mem_set_info (rtx_insn *insn)
1512 int bb;
1514 if (! flag_gcse_lm)
1515 return;
1517 /* load_killed_in_block_p will handle the case of calls clobbering
1518 everything. */
1519 bb = BLOCK_FOR_INSN (insn)->index;
1520 modify_mem_list[bb].safe_push (insn);
1521 bitmap_set_bit (modify_mem_list_set, bb);
1523 if (CALL_P (insn))
1524 bitmap_set_bit (blocks_with_calls, bb);
1525 else
1526 note_stores (PATTERN (insn), canon_list_insert, (void*) insn);
1529 /* Called from compute_hash_table via note_stores to handle one
1530 SET or CLOBBER in an insn. DATA is really the instruction in which
1531 the SET is taking place. */
1533 static void
1534 record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
1536 rtx_insn *last_set_insn = (rtx_insn *) data;
1538 if (GET_CODE (dest) == SUBREG)
1539 dest = SUBREG_REG (dest);
1541 if (REG_P (dest))
1542 record_last_reg_set_info (last_set_insn, REGNO (dest));
1543 else if (MEM_P (dest)
1544 /* Ignore pushes, they clobber nothing. */
1545 && ! push_operand (dest, GET_MODE (dest)))
1546 record_last_mem_set_info (last_set_insn);
1549 /* Top level function to create an expression hash table.
1551 Expression entries are placed in the hash table if
1552 - they are of the form (set (pseudo-reg) src),
1553 - src is something we want to perform GCSE on,
1554 - none of the operands are subsequently modified in the block
1556 Currently src must be a pseudo-reg or a const_int.
1558 TABLE is the table computed. */
1560 static void
1561 compute_hash_table_work (struct gcse_hash_table_d *table)
1563 int i;
1565 /* re-Cache any INSN_LIST nodes we have allocated. */
1566 clear_modify_mem_tables ();
1567 /* Some working arrays used to track first and last set in each block. */
1568 reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ());
1570 for (i = 0; i < max_reg_num (); ++i)
1571 reg_avail_info[i].last_bb = NULL;
1573 FOR_EACH_BB_FN (current_bb, cfun)
1575 rtx_insn *insn;
1576 unsigned int regno;
1578 /* First pass over the instructions records information used to
1579 determine when registers and memory are first and last set. */
1580 FOR_BB_INSNS (current_bb, insn)
1582 if (!NONDEBUG_INSN_P (insn))
1583 continue;
1585 if (CALL_P (insn))
1587 hard_reg_set_iterator hrsi;
1588 EXECUTE_IF_SET_IN_HARD_REG_SET (regs_invalidated_by_call,
1589 0, regno, hrsi)
1590 record_last_reg_set_info (insn, regno);
1592 if (! RTL_CONST_OR_PURE_CALL_P (insn))
1593 record_last_mem_set_info (insn);
1596 note_stores (PATTERN (insn), record_last_set_info, insn);
1599 /* The next pass builds the hash table. */
1600 FOR_BB_INSNS (current_bb, insn)
1601 if (NONDEBUG_INSN_P (insn))
1602 hash_scan_insn (insn, table);
1605 free (reg_avail_info);
1606 reg_avail_info = NULL;
1609 /* Allocate space for the set/expr hash TABLE.
1610 It is used to determine the number of buckets to use. */
1612 static void
1613 alloc_hash_table (struct gcse_hash_table_d *table)
1615 int n;
1617 n = get_max_insn_count ();
1619 table->size = n / 4;
1620 if (table->size < 11)
1621 table->size = 11;
1623 /* Attempt to maintain efficient use of hash table.
1624 Making it an odd number is simplest for now.
1625 ??? Later take some measurements. */
1626 table->size |= 1;
1627 n = table->size * sizeof (struct gcse_expr *);
1628 table->table = GNEWVAR (struct gcse_expr *, n);
1631 /* Free things allocated by alloc_hash_table. */
1633 static void
1634 free_hash_table (struct gcse_hash_table_d *table)
1636 free (table->table);
1639 /* Compute the expression hash table TABLE. */
1641 static void
1642 compute_hash_table (struct gcse_hash_table_d *table)
1644 /* Initialize count of number of entries in hash table. */
1645 table->n_elems = 0;
1646 memset (table->table, 0, table->size * sizeof (struct gcse_expr *));
1648 compute_hash_table_work (table);
1651 /* Expression tracking support. */
1653 /* Clear canon_modify_mem_list and modify_mem_list tables. */
1654 static void
1655 clear_modify_mem_tables (void)
1657 unsigned i;
1658 bitmap_iterator bi;
1660 EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
1662 modify_mem_list[i].release ();
1663 canon_modify_mem_list[i].release ();
1665 bitmap_clear (modify_mem_list_set);
1666 bitmap_clear (blocks_with_calls);
1669 /* Release memory used by modify_mem_list_set. */
1671 static void
1672 free_modify_mem_tables (void)
1674 clear_modify_mem_tables ();
1675 free (modify_mem_list);
1676 free (canon_modify_mem_list);
1677 modify_mem_list = 0;
1678 canon_modify_mem_list = 0;
1681 /* For each block, compute whether X is transparent. X is either an
1682 expression or an assignment [though we don't care which, for this context
1683 an assignment is treated as an expression]. For each block where an
1684 element of X is modified, reset the INDX bit in BMAP. */
1686 static void
1687 compute_transp (const_rtx x, int indx, sbitmap *bmap)
1689 int i, j;
1690 enum rtx_code code;
1691 const char *fmt;
1693 /* repeat is used to turn tail-recursion into iteration since GCC
1694 can't do it when there's no return value. */
1695 repeat:
1697 if (x == 0)
1698 return;
1700 code = GET_CODE (x);
1701 switch (code)
1703 case REG:
1705 df_ref def;
1706 for (def = DF_REG_DEF_CHAIN (REGNO (x));
1707 def;
1708 def = DF_REF_NEXT_REG (def))
1709 bitmap_clear_bit (bmap[DF_REF_BB (def)->index], indx);
1712 return;
1714 case MEM:
1715 if (! MEM_READONLY_P (x))
1717 bitmap_iterator bi;
1718 unsigned bb_index;
1719 rtx x_addr;
1721 x_addr = get_addr (XEXP (x, 0));
1722 x_addr = canon_rtx (x_addr);
1724 /* First handle all the blocks with calls. We don't need to
1725 do any list walking for them. */
1726 EXECUTE_IF_SET_IN_BITMAP (blocks_with_calls, 0, bb_index, bi)
1728 bitmap_clear_bit (bmap[bb_index], indx);
1731 /* Now iterate over the blocks which have memory modifications
1732 but which do not have any calls. */
1733 EXECUTE_IF_AND_COMPL_IN_BITMAP (modify_mem_list_set,
1734 blocks_with_calls,
1735 0, bb_index, bi)
1737 vec<modify_pair> list
1738 = canon_modify_mem_list[bb_index];
1739 modify_pair *pair;
1740 unsigned ix;
1742 FOR_EACH_VEC_ELT_REVERSE (list, ix, pair)
1744 rtx dest = pair->dest;
1745 rtx dest_addr = pair->dest_addr;
1747 if (canon_true_dependence (dest, GET_MODE (dest),
1748 dest_addr, x, x_addr))
1750 bitmap_clear_bit (bmap[bb_index], indx);
1751 break;
1757 x = XEXP (x, 0);
1758 goto repeat;
1760 case PC:
1761 case CC0: /*FIXME*/
1762 case CONST:
1763 CASE_CONST_ANY:
1764 case SYMBOL_REF:
1765 case LABEL_REF:
1766 case ADDR_VEC:
1767 case ADDR_DIFF_VEC:
1768 return;
1770 default:
1771 break;
1774 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1776 if (fmt[i] == 'e')
1778 /* If we are about to do the last recursive call
1779 needed at this level, change it into iteration.
1780 This function is called enough to be worth it. */
1781 if (i == 0)
1783 x = XEXP (x, i);
1784 goto repeat;
1787 compute_transp (XEXP (x, i), indx, bmap);
1789 else if (fmt[i] == 'E')
1790 for (j = 0; j < XVECLEN (x, i); j++)
1791 compute_transp (XVECEXP (x, i, j), indx, bmap);
1795 /* Compute PRE+LCM working variables. */
1797 /* Local properties of expressions. */
1799 /* Nonzero for expressions that are transparent in the block. */
1800 static sbitmap *transp;
1802 /* Nonzero for expressions that are computed (available) in the block. */
1803 static sbitmap *comp;
1805 /* Nonzero for expressions that are locally anticipatable in the block. */
1806 static sbitmap *antloc;
1808 /* Nonzero for expressions where this block is an optimal computation
1809 point. */
1810 static sbitmap *pre_optimal;
1812 /* Nonzero for expressions which are redundant in a particular block. */
1813 static sbitmap *pre_redundant;
1815 /* Nonzero for expressions which should be inserted on a specific edge. */
1816 static sbitmap *pre_insert_map;
1818 /* Nonzero for expressions which should be deleted in a specific block. */
1819 static sbitmap *pre_delete_map;
1821 /* Allocate vars used for PRE analysis. */
1823 static void
1824 alloc_pre_mem (int n_blocks, int n_exprs)
1826 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
1827 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
1828 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
1830 pre_optimal = NULL;
1831 pre_redundant = NULL;
1832 pre_insert_map = NULL;
1833 pre_delete_map = NULL;
1834 ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
1836 /* pre_insert and pre_delete are allocated later. */
1839 /* Free vars used for PRE analysis. */
1841 static void
1842 free_pre_mem (void)
1844 sbitmap_vector_free (transp);
1845 sbitmap_vector_free (comp);
1847 /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */
1849 if (pre_optimal)
1850 sbitmap_vector_free (pre_optimal);
1851 if (pre_redundant)
1852 sbitmap_vector_free (pre_redundant);
1853 if (pre_insert_map)
1854 sbitmap_vector_free (pre_insert_map);
1855 if (pre_delete_map)
1856 sbitmap_vector_free (pre_delete_map);
1858 transp = comp = NULL;
1859 pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
1862 /* Remove certain expressions from anticipatable and transparent
1863 sets of basic blocks that have incoming abnormal edge.
1864 For PRE remove potentially trapping expressions to avoid placing
1865 them on abnormal edges. For hoisting remove memory references that
1866 can be clobbered by calls. */
1868 static void
1869 prune_expressions (bool pre_p)
1871 sbitmap prune_exprs;
1872 struct gcse_expr *expr;
1873 unsigned int ui;
1874 basic_block bb;
1876 prune_exprs = sbitmap_alloc (expr_hash_table.n_elems);
1877 bitmap_clear (prune_exprs);
1878 for (ui = 0; ui < expr_hash_table.size; ui++)
1880 for (expr = expr_hash_table.table[ui]; expr; expr = expr->next_same_hash)
1882 /* Note potentially trapping expressions. */
1883 if (may_trap_p (expr->expr))
1885 bitmap_set_bit (prune_exprs, expr->bitmap_index);
1886 continue;
1889 if (!pre_p && MEM_P (expr->expr))
1890 /* Note memory references that can be clobbered by a call.
1891 We do not split abnormal edges in hoisting, so would
1892 a memory reference get hoisted along an abnormal edge,
1893 it would be placed /before/ the call. Therefore, only
1894 constant memory references can be hoisted along abnormal
1895 edges. */
1897 if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
1898 && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
1899 continue;
1901 if (MEM_READONLY_P (expr->expr)
1902 && !MEM_VOLATILE_P (expr->expr)
1903 && MEM_NOTRAP_P (expr->expr))
1904 /* Constant memory reference, e.g., a PIC address. */
1905 continue;
1907 /* ??? Optimally, we would use interprocedural alias
1908 analysis to determine if this mem is actually killed
1909 by this call. */
1911 bitmap_set_bit (prune_exprs, expr->bitmap_index);
1916 FOR_EACH_BB_FN (bb, cfun)
1918 edge e;
1919 edge_iterator ei;
1921 /* If the current block is the destination of an abnormal edge, we
1922 kill all trapping (for PRE) and memory (for hoist) expressions
1923 because we won't be able to properly place the instruction on
1924 the edge. So make them neither anticipatable nor transparent.
1925 This is fairly conservative.
1927 ??? For hoisting it may be necessary to check for set-and-jump
1928 instructions here, not just for abnormal edges. The general problem
1929 is that when an expression cannot not be placed right at the end of
1930 a basic block we should account for any side-effects of a subsequent
1931 jump instructions that could clobber the expression. It would
1932 be best to implement this check along the lines of
1933 should_hoist_expr_to_dom where the target block is already known
1934 and, hence, there's no need to conservatively prune expressions on
1935 "intermediate" set-and-jump instructions. */
1936 FOR_EACH_EDGE (e, ei, bb->preds)
1937 if ((e->flags & EDGE_ABNORMAL)
1938 && (pre_p || CALL_P (BB_END (e->src))))
1940 bitmap_and_compl (antloc[bb->index],
1941 antloc[bb->index], prune_exprs);
1942 bitmap_and_compl (transp[bb->index],
1943 transp[bb->index], prune_exprs);
1944 break;
1948 sbitmap_free (prune_exprs);
1951 /* It may be necessary to insert a large number of insns on edges to
1952 make the existing occurrences of expressions fully redundant. This
1953 routine examines the set of insertions and deletions and if the ratio
1954 of insertions to deletions is too high for a particular expression, then
1955 the expression is removed from the insertion/deletion sets.
1957 N_ELEMS is the number of elements in the hash table. */
1959 static void
1960 prune_insertions_deletions (int n_elems)
1962 sbitmap_iterator sbi;
1963 sbitmap prune_exprs;
1965 /* We always use I to iterate over blocks/edges and J to iterate over
1966 expressions. */
1967 unsigned int i, j;
1969 /* Counts for the number of times an expression needs to be inserted and
1970 number of times an expression can be removed as a result. */
1971 int *insertions = GCNEWVEC (int, n_elems);
1972 int *deletions = GCNEWVEC (int, n_elems);
1974 /* Set of expressions which require too many insertions relative to
1975 the number of deletions achieved. We will prune these out of the
1976 insertion/deletion sets. */
1977 prune_exprs = sbitmap_alloc (n_elems);
1978 bitmap_clear (prune_exprs);
1980 /* Iterate over the edges counting the number of times each expression
1981 needs to be inserted. */
1982 for (i = 0; i < (unsigned) n_edges_for_fn (cfun); i++)
1984 EXECUTE_IF_SET_IN_BITMAP (pre_insert_map[i], 0, j, sbi)
1985 insertions[j]++;
1988 /* Similarly for deletions, but those occur in blocks rather than on
1989 edges. */
1990 for (i = 0; i < (unsigned) last_basic_block_for_fn (cfun); i++)
1992 EXECUTE_IF_SET_IN_BITMAP (pre_delete_map[i], 0, j, sbi)
1993 deletions[j]++;
1996 /* Now that we have accurate counts, iterate over the elements in the
1997 hash table and see if any need too many insertions relative to the
1998 number of evaluations that can be removed. If so, mark them in
1999 PRUNE_EXPRS. */
2000 for (j = 0; j < (unsigned) n_elems; j++)
2001 if (deletions[j]
2002 && ((unsigned) insertions[j] / deletions[j]) > MAX_GCSE_INSERTION_RATIO)
2003 bitmap_set_bit (prune_exprs, j);
2005 /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS. */
2006 EXECUTE_IF_SET_IN_BITMAP (prune_exprs, 0, j, sbi)
2008 for (i = 0; i < (unsigned) n_edges_for_fn (cfun); i++)
2009 bitmap_clear_bit (pre_insert_map[i], j);
2011 for (i = 0; i < (unsigned) last_basic_block_for_fn (cfun); i++)
2012 bitmap_clear_bit (pre_delete_map[i], j);
2015 sbitmap_free (prune_exprs);
2016 free (insertions);
2017 free (deletions);
2020 /* Top level routine to do the dataflow analysis needed by PRE. */
2022 static struct edge_list *
2023 compute_pre_data (void)
2025 struct edge_list *edge_list;
2026 basic_block bb;
2028 compute_local_properties (transp, comp, antloc, &expr_hash_table);
2029 prune_expressions (true);
2030 bitmap_vector_clear (ae_kill, last_basic_block_for_fn (cfun));
2032 /* Compute ae_kill for each basic block using:
2034 ~(TRANSP | COMP)
2037 FOR_EACH_BB_FN (bb, cfun)
2039 bitmap_ior (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
2040 bitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
2043 edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc,
2044 ae_kill, &pre_insert_map, &pre_delete_map);
2045 sbitmap_vector_free (antloc);
2046 antloc = NULL;
2047 sbitmap_vector_free (ae_kill);
2048 ae_kill = NULL;
2050 prune_insertions_deletions (expr_hash_table.n_elems);
2052 return edge_list;
2055 /* PRE utilities */
2057 /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach
2058 block BB.
2060 VISITED is a pointer to a working buffer for tracking which BB's have
2061 been visited. It is NULL for the top-level call.
2063 We treat reaching expressions that go through blocks containing the same
2064 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
2065 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2066 2 as not reaching. The intent is to improve the probability of finding
2067 only one reaching expression and to reduce register lifetimes by picking
2068 the closest such expression. */
2070 static int
2071 pre_expr_reaches_here_p_work (basic_block occr_bb, struct gcse_expr *expr,
2072 basic_block bb, char *visited)
2074 edge pred;
2075 edge_iterator ei;
2077 FOR_EACH_EDGE (pred, ei, bb->preds)
2079 basic_block pred_bb = pred->src;
2081 if (pred->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)
2082 /* Has predecessor has already been visited? */
2083 || visited[pred_bb->index])
2084 ;/* Nothing to do. */
2086 /* Does this predecessor generate this expression? */
2087 else if (bitmap_bit_p (comp[pred_bb->index], expr->bitmap_index))
2089 /* Is this the occurrence we're looking for?
2090 Note that there's only one generating occurrence per block
2091 so we just need to check the block number. */
2092 if (occr_bb == pred_bb)
2093 return 1;
2095 visited[pred_bb->index] = 1;
2097 /* Ignore this predecessor if it kills the expression. */
2098 else if (! bitmap_bit_p (transp[pred_bb->index], expr->bitmap_index))
2099 visited[pred_bb->index] = 1;
2101 /* Neither gen nor kill. */
2102 else
2104 visited[pred_bb->index] = 1;
2105 if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
2106 return 1;
2110 /* All paths have been checked. */
2111 return 0;
2114 /* The wrapper for pre_expr_reaches_here_work that ensures that any
2115 memory allocated for that function is returned. */
2117 static int
2118 pre_expr_reaches_here_p (basic_block occr_bb, struct gcse_expr *expr, basic_block bb)
2120 int rval;
2121 char *visited = XCNEWVEC (char, last_basic_block_for_fn (cfun));
2123 rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
2125 free (visited);
2126 return rval;
2129 /* Generate RTL to copy an EXPR to its `reaching_reg' and return it. */
2131 static rtx_insn *
2132 process_insert_insn (struct gcse_expr *expr)
2134 rtx reg = expr->reaching_reg;
2135 /* Copy the expression to make sure we don't have any sharing issues. */
2136 rtx exp = copy_rtx (expr->expr);
2137 rtx_insn *pat;
2139 start_sequence ();
2141 /* If the expression is something that's an operand, like a constant,
2142 just copy it to a register. */
2143 if (general_operand (exp, GET_MODE (reg)))
2144 emit_move_insn (reg, exp);
2146 /* Otherwise, make a new insn to compute this expression and make sure the
2147 insn will be recognized (this also adds any needed CLOBBERs). */
2148 else
2150 rtx_insn *insn = emit_insn (gen_rtx_SET (VOIDmode, reg, exp));
2152 if (insn_invalid_p (insn, false))
2153 gcc_unreachable ();
2156 pat = get_insns ();
2157 end_sequence ();
2159 return pat;
2162 /* Add EXPR to the end of basic block BB.
2164 This is used by both the PRE and code hoisting. */
2166 static void
2167 insert_insn_end_basic_block (struct gcse_expr *expr, basic_block bb)
2169 rtx_insn *insn = BB_END (bb);
2170 rtx_insn *new_insn;
2171 rtx reg = expr->reaching_reg;
2172 int regno = REGNO (reg);
2173 rtx_insn *pat, *pat_end;
2175 pat = process_insert_insn (expr);
2176 gcc_assert (pat && INSN_P (pat));
2178 pat_end = pat;
2179 while (NEXT_INSN (pat_end) != NULL_RTX)
2180 pat_end = NEXT_INSN (pat_end);
2182 /* If the last insn is a jump, insert EXPR in front [taking care to
2183 handle cc0, etc. properly]. Similarly we need to care trapping
2184 instructions in presence of non-call exceptions. */
2186 if (JUMP_P (insn)
2187 || (NONJUMP_INSN_P (insn)
2188 && (!single_succ_p (bb)
2189 || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
2191 #ifdef HAVE_cc0
2192 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
2193 if cc0 isn't set. */
2194 rtx note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
2195 if (note)
2196 insn = safe_as_a <rtx_insn *> (XEXP (note, 0));
2197 else
2199 rtx_insn *maybe_cc0_setter = prev_nonnote_insn (insn);
2200 if (maybe_cc0_setter
2201 && INSN_P (maybe_cc0_setter)
2202 && sets_cc0_p (PATTERN (maybe_cc0_setter)))
2203 insn = maybe_cc0_setter;
2205 #endif
2206 /* FIXME: What if something in cc0/jump uses value set in new insn? */
2207 new_insn = emit_insn_before_noloc (pat, insn, bb);
2210 /* Likewise if the last insn is a call, as will happen in the presence
2211 of exception handling. */
2212 else if (CALL_P (insn)
2213 && (!single_succ_p (bb)
2214 || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
2216 /* Keeping in mind targets with small register classes and parameters
2217 in registers, we search backward and place the instructions before
2218 the first parameter is loaded. Do this for everyone for consistency
2219 and a presumption that we'll get better code elsewhere as well. */
2221 /* Since different machines initialize their parameter registers
2222 in different orders, assume nothing. Collect the set of all
2223 parameter registers. */
2224 insn = find_first_parameter_load (insn, BB_HEAD (bb));
2226 /* If we found all the parameter loads, then we want to insert
2227 before the first parameter load.
2229 If we did not find all the parameter loads, then we might have
2230 stopped on the head of the block, which could be a CODE_LABEL.
2231 If we inserted before the CODE_LABEL, then we would be putting
2232 the insn in the wrong basic block. In that case, put the insn
2233 after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */
2234 while (LABEL_P (insn)
2235 || NOTE_INSN_BASIC_BLOCK_P (insn))
2236 insn = NEXT_INSN (insn);
2238 new_insn = emit_insn_before_noloc (pat, insn, bb);
2240 else
2241 new_insn = emit_insn_after_noloc (pat, insn, bb);
2243 while (1)
2245 if (INSN_P (pat))
2246 add_label_notes (PATTERN (pat), new_insn);
2247 if (pat == pat_end)
2248 break;
2249 pat = NEXT_INSN (pat);
2252 gcse_create_count++;
2254 if (dump_file)
2256 fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ",
2257 bb->index, INSN_UID (new_insn));
2258 fprintf (dump_file, "copying expression %d to reg %d\n",
2259 expr->bitmap_index, regno);
2263 /* Insert partially redundant expressions on edges in the CFG to make
2264 the expressions fully redundant. */
2266 static int
2267 pre_edge_insert (struct edge_list *edge_list, struct gcse_expr **index_map)
2269 int e, i, j, num_edges, set_size, did_insert = 0;
2270 sbitmap *inserted;
2272 /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
2273 if it reaches any of the deleted expressions. */
2275 set_size = pre_insert_map[0]->size;
2276 num_edges = NUM_EDGES (edge_list);
2277 inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems);
2278 bitmap_vector_clear (inserted, num_edges);
2280 for (e = 0; e < num_edges; e++)
2282 int indx;
2283 basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
2285 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
2287 SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
2289 for (j = indx;
2290 insert && j < (int) expr_hash_table.n_elems;
2291 j++, insert >>= 1)
2292 if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
2294 struct gcse_expr *expr = index_map[j];
2295 struct gcse_occr *occr;
2297 /* Now look at each deleted occurrence of this expression. */
2298 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2300 if (! occr->deleted_p)
2301 continue;
2303 /* Insert this expression on this edge if it would
2304 reach the deleted occurrence in BB. */
2305 if (!bitmap_bit_p (inserted[e], j))
2307 rtx_insn *insn;
2308 edge eg = INDEX_EDGE (edge_list, e);
2310 /* We can't insert anything on an abnormal and
2311 critical edge, so we insert the insn at the end of
2312 the previous block. There are several alternatives
2313 detailed in Morgans book P277 (sec 10.5) for
2314 handling this situation. This one is easiest for
2315 now. */
2317 if (eg->flags & EDGE_ABNORMAL)
2318 insert_insn_end_basic_block (index_map[j], bb);
2319 else
2321 insn = process_insert_insn (index_map[j]);
2322 insert_insn_on_edge (insn, eg);
2325 if (dump_file)
2327 fprintf (dump_file, "PRE: edge (%d,%d), ",
2328 bb->index,
2329 INDEX_EDGE_SUCC_BB (edge_list, e)->index);
2330 fprintf (dump_file, "copy expression %d\n",
2331 expr->bitmap_index);
2334 update_ld_motion_stores (expr);
2335 bitmap_set_bit (inserted[e], j);
2336 did_insert = 1;
2337 gcse_create_count++;
2344 sbitmap_vector_free (inserted);
2345 return did_insert;
2348 /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
2349 Given "old_reg <- expr" (INSN), instead of adding after it
2350 reaching_reg <- old_reg
2351 it's better to do the following:
2352 reaching_reg <- expr
2353 old_reg <- reaching_reg
2354 because this way copy propagation can discover additional PRE
2355 opportunities. But if this fails, we try the old way.
2356 When "expr" is a store, i.e.
2357 given "MEM <- old_reg", instead of adding after it
2358 reaching_reg <- old_reg
2359 it's better to add it before as follows:
2360 reaching_reg <- old_reg
2361 MEM <- reaching_reg. */
2363 static void
2364 pre_insert_copy_insn (struct gcse_expr *expr, rtx_insn *insn)
2366 rtx reg = expr->reaching_reg;
2367 int regno = REGNO (reg);
2368 int indx = expr->bitmap_index;
2369 rtx pat = PATTERN (insn);
2370 rtx set, first_set, new_insn;
2371 rtx old_reg;
2372 int i;
2374 /* This block matches the logic in hash_scan_insn. */
2375 switch (GET_CODE (pat))
2377 case SET:
2378 set = pat;
2379 break;
2381 case PARALLEL:
2382 /* Search through the parallel looking for the set whose
2383 source was the expression that we're interested in. */
2384 first_set = NULL_RTX;
2385 set = NULL_RTX;
2386 for (i = 0; i < XVECLEN (pat, 0); i++)
2388 rtx x = XVECEXP (pat, 0, i);
2389 if (GET_CODE (x) == SET)
2391 /* If the source was a REG_EQUAL or REG_EQUIV note, we
2392 may not find an equivalent expression, but in this
2393 case the PARALLEL will have a single set. */
2394 if (first_set == NULL_RTX)
2395 first_set = x;
2396 if (expr_equiv_p (SET_SRC (x), expr->expr))
2398 set = x;
2399 break;
2404 gcc_assert (first_set);
2405 if (set == NULL_RTX)
2406 set = first_set;
2407 break;
2409 default:
2410 gcc_unreachable ();
2413 if (REG_P (SET_DEST (set)))
2415 old_reg = SET_DEST (set);
2416 /* Check if we can modify the set destination in the original insn. */
2417 if (validate_change (insn, &SET_DEST (set), reg, 0))
2419 new_insn = gen_move_insn (old_reg, reg);
2420 new_insn = emit_insn_after (new_insn, insn);
2422 else
2424 new_insn = gen_move_insn (reg, old_reg);
2425 new_insn = emit_insn_after (new_insn, insn);
2428 else /* This is possible only in case of a store to memory. */
2430 old_reg = SET_SRC (set);
2431 new_insn = gen_move_insn (reg, old_reg);
2433 /* Check if we can modify the set source in the original insn. */
2434 if (validate_change (insn, &SET_SRC (set), reg, 0))
2435 new_insn = emit_insn_before (new_insn, insn);
2436 else
2437 new_insn = emit_insn_after (new_insn, insn);
2440 gcse_create_count++;
2442 if (dump_file)
2443 fprintf (dump_file,
2444 "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
2445 BLOCK_FOR_INSN (insn)->index, INSN_UID (new_insn), indx,
2446 INSN_UID (insn), regno);
2449 /* Copy available expressions that reach the redundant expression
2450 to `reaching_reg'. */
2452 static void
2453 pre_insert_copies (void)
2455 unsigned int i, added_copy;
2456 struct gcse_expr *expr;
2457 struct gcse_occr *occr;
2458 struct gcse_occr *avail;
2460 /* For each available expression in the table, copy the result to
2461 `reaching_reg' if the expression reaches a deleted one.
2463 ??? The current algorithm is rather brute force.
2464 Need to do some profiling. */
2466 for (i = 0; i < expr_hash_table.size; i++)
2467 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2469 /* If the basic block isn't reachable, PPOUT will be TRUE. However,
2470 we don't want to insert a copy here because the expression may not
2471 really be redundant. So only insert an insn if the expression was
2472 deleted. This test also avoids further processing if the
2473 expression wasn't deleted anywhere. */
2474 if (expr->reaching_reg == NULL)
2475 continue;
2477 /* Set when we add a copy for that expression. */
2478 added_copy = 0;
2480 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2482 if (! occr->deleted_p)
2483 continue;
2485 for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
2487 rtx_insn *insn = avail->insn;
2489 /* No need to handle this one if handled already. */
2490 if (avail->copied_p)
2491 continue;
2493 /* Don't handle this one if it's a redundant one. */
2494 if (insn->deleted ())
2495 continue;
2497 /* Or if the expression doesn't reach the deleted one. */
2498 if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
2499 expr,
2500 BLOCK_FOR_INSN (occr->insn)))
2501 continue;
2503 added_copy = 1;
2505 /* Copy the result of avail to reaching_reg. */
2506 pre_insert_copy_insn (expr, insn);
2507 avail->copied_p = 1;
2511 if (added_copy)
2512 update_ld_motion_stores (expr);
2516 struct set_data
2518 rtx_insn *insn;
2519 const_rtx set;
2520 int nsets;
2523 /* Increment number of sets and record set in DATA. */
2525 static void
2526 record_set_data (rtx dest, const_rtx set, void *data)
2528 struct set_data *s = (struct set_data *)data;
2530 if (GET_CODE (set) == SET)
2532 /* We allow insns having multiple sets, where all but one are
2533 dead as single set insns. In the common case only a single
2534 set is present, so we want to avoid checking for REG_UNUSED
2535 notes unless necessary. */
2536 if (s->nsets == 1
2537 && find_reg_note (s->insn, REG_UNUSED, SET_DEST (s->set))
2538 && !side_effects_p (s->set))
2539 s->nsets = 0;
2541 if (!s->nsets)
2543 /* Record this set. */
2544 s->nsets += 1;
2545 s->set = set;
2547 else if (!find_reg_note (s->insn, REG_UNUSED, dest)
2548 || side_effects_p (set))
2549 s->nsets += 1;
2553 static const_rtx
2554 single_set_gcse (rtx_insn *insn)
2556 struct set_data s;
2557 rtx pattern;
2559 gcc_assert (INSN_P (insn));
2561 /* Optimize common case. */
2562 pattern = PATTERN (insn);
2563 if (GET_CODE (pattern) == SET)
2564 return pattern;
2566 s.insn = insn;
2567 s.nsets = 0;
2568 note_stores (pattern, record_set_data, &s);
2570 /* Considered invariant insns have exactly one set. */
2571 gcc_assert (s.nsets == 1);
2572 return s.set;
2575 /* Emit move from SRC to DEST noting the equivalence with expression computed
2576 in INSN. */
2578 static rtx
2579 gcse_emit_move_after (rtx dest, rtx src, rtx_insn *insn)
2581 rtx_insn *new_rtx;
2582 const_rtx set = single_set_gcse (insn);
2583 rtx set2;
2584 rtx note;
2585 rtx eqv = NULL_RTX;
2587 /* This should never fail since we're creating a reg->reg copy
2588 we've verified to be valid. */
2590 new_rtx = emit_insn_after (gen_move_insn (dest, src), insn);
2592 /* Note the equivalence for local CSE pass. Take the note from the old
2593 set if there was one. Otherwise record the SET_SRC from the old set
2594 unless DEST is also an operand of the SET_SRC. */
2595 set2 = single_set (new_rtx);
2596 if (!set2 || !rtx_equal_p (SET_DEST (set2), dest))
2597 return new_rtx;
2598 if ((note = find_reg_equal_equiv_note (insn)))
2599 eqv = XEXP (note, 0);
2600 else if (! REG_P (dest)
2601 || ! reg_mentioned_p (dest, SET_SRC (set)))
2602 eqv = SET_SRC (set);
2604 if (eqv != NULL_RTX)
2605 set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv));
2607 return new_rtx;
2610 /* Delete redundant computations.
2611 Deletion is done by changing the insn to copy the `reaching_reg' of
2612 the expression into the result of the SET. It is left to later passes
2613 to propagate the copy or eliminate it.
2615 Return nonzero if a change is made. */
2617 static int
2618 pre_delete (void)
2620 unsigned int i;
2621 int changed;
2622 struct gcse_expr *expr;
2623 struct gcse_occr *occr;
2625 changed = 0;
2626 for (i = 0; i < expr_hash_table.size; i++)
2627 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2629 int indx = expr->bitmap_index;
2631 /* We only need to search antic_occr since we require ANTLOC != 0. */
2632 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2634 rtx_insn *insn = occr->insn;
2635 rtx set;
2636 basic_block bb = BLOCK_FOR_INSN (insn);
2638 /* We only delete insns that have a single_set. */
2639 if (bitmap_bit_p (pre_delete_map[bb->index], indx)
2640 && (set = single_set (insn)) != 0
2641 && dbg_cnt (pre_insn))
2643 /* Create a pseudo-reg to store the result of reaching
2644 expressions into. Get the mode for the new pseudo from
2645 the mode of the original destination pseudo. */
2646 if (expr->reaching_reg == NULL)
2647 expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set));
2649 gcse_emit_move_after (SET_DEST (set), expr->reaching_reg, insn);
2650 delete_insn (insn);
2651 occr->deleted_p = 1;
2652 changed = 1;
2653 gcse_subst_count++;
2655 if (dump_file)
2657 fprintf (dump_file,
2658 "PRE: redundant insn %d (expression %d) in ",
2659 INSN_UID (insn), indx);
2660 fprintf (dump_file, "bb %d, reaching reg is %d\n",
2661 bb->index, REGNO (expr->reaching_reg));
2667 return changed;
2670 /* Perform GCSE optimizations using PRE.
2671 This is called by one_pre_gcse_pass after all the dataflow analysis
2672 has been done.
2674 This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
2675 lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
2676 Compiler Design and Implementation.
2678 ??? A new pseudo reg is created to hold the reaching expression. The nice
2679 thing about the classical approach is that it would try to use an existing
2680 reg. If the register can't be adequately optimized [i.e. we introduce
2681 reload problems], one could add a pass here to propagate the new register
2682 through the block.
2684 ??? We don't handle single sets in PARALLELs because we're [currently] not
2685 able to copy the rest of the parallel when we insert copies to create full
2686 redundancies from partial redundancies. However, there's no reason why we
2687 can't handle PARALLELs in the cases where there are no partial
2688 redundancies. */
2690 static int
2691 pre_gcse (struct edge_list *edge_list)
2693 unsigned int i;
2694 int did_insert, changed;
2695 struct gcse_expr **index_map;
2696 struct gcse_expr *expr;
2698 /* Compute a mapping from expression number (`bitmap_index') to
2699 hash table entry. */
2701 index_map = XCNEWVEC (struct gcse_expr *, expr_hash_table.n_elems);
2702 for (i = 0; i < expr_hash_table.size; i++)
2703 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2704 index_map[expr->bitmap_index] = expr;
2706 /* Delete the redundant insns first so that
2707 - we know what register to use for the new insns and for the other
2708 ones with reaching expressions
2709 - we know which insns are redundant when we go to create copies */
2711 changed = pre_delete ();
2712 did_insert = pre_edge_insert (edge_list, index_map);
2714 /* In other places with reaching expressions, copy the expression to the
2715 specially allocated pseudo-reg that reaches the redundant expr. */
2716 pre_insert_copies ();
2717 if (did_insert)
2719 commit_edge_insertions ();
2720 changed = 1;
2723 free (index_map);
2724 return changed;
2727 /* Top level routine to perform one PRE GCSE pass.
2729 Return nonzero if a change was made. */
2731 static int
2732 one_pre_gcse_pass (void)
2734 int changed = 0;
2736 gcse_subst_count = 0;
2737 gcse_create_count = 0;
2739 /* Return if there's nothing to do, or it is too expensive. */
2740 if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1
2741 || is_too_expensive (_("PRE disabled")))
2742 return 0;
2744 /* We need alias. */
2745 init_alias_analysis ();
2747 bytes_used = 0;
2748 gcc_obstack_init (&gcse_obstack);
2749 alloc_gcse_mem ();
2751 alloc_hash_table (&expr_hash_table);
2752 add_noreturn_fake_exit_edges ();
2753 if (flag_gcse_lm)
2754 compute_ld_motion_mems ();
2756 compute_hash_table (&expr_hash_table);
2757 if (flag_gcse_lm)
2758 trim_ld_motion_mems ();
2759 if (dump_file)
2760 dump_hash_table (dump_file, "Expression", &expr_hash_table);
2762 if (expr_hash_table.n_elems > 0)
2764 struct edge_list *edge_list;
2765 alloc_pre_mem (last_basic_block_for_fn (cfun), expr_hash_table.n_elems);
2766 edge_list = compute_pre_data ();
2767 changed |= pre_gcse (edge_list);
2768 free_edge_list (edge_list);
2769 free_pre_mem ();
2772 if (flag_gcse_lm)
2773 free_ld_motion_mems ();
2774 remove_fake_exit_edges ();
2775 free_hash_table (&expr_hash_table);
2777 free_gcse_mem ();
2778 obstack_free (&gcse_obstack, NULL);
2780 /* We are finished with alias. */
2781 end_alias_analysis ();
2783 if (dump_file)
2785 fprintf (dump_file, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ",
2786 current_function_name (), n_basic_blocks_for_fn (cfun),
2787 bytes_used);
2788 fprintf (dump_file, "%d substs, %d insns created\n",
2789 gcse_subst_count, gcse_create_count);
2792 return changed;
2795 /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
2796 to INSN. If such notes are added to an insn which references a
2797 CODE_LABEL, the LABEL_NUSES count is incremented. We have to add
2798 that note, because the following loop optimization pass requires
2799 them. */
2801 /* ??? If there was a jump optimization pass after gcse and before loop,
2802 then we would not need to do this here, because jump would add the
2803 necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes. */
2805 static void
2806 add_label_notes (rtx x, rtx insn)
2808 enum rtx_code code = GET_CODE (x);
2809 int i, j;
2810 const char *fmt;
2812 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
2814 /* This code used to ignore labels that referred to dispatch tables to
2815 avoid flow generating (slightly) worse code.
2817 We no longer ignore such label references (see LABEL_REF handling in
2818 mark_jump_label for additional information). */
2820 /* There's no reason for current users to emit jump-insns with
2821 such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
2822 notes. */
2823 gcc_assert (!JUMP_P (insn));
2824 add_reg_note (insn, REG_LABEL_OPERAND, LABEL_REF_LABEL (x));
2826 if (LABEL_P (LABEL_REF_LABEL (x)))
2827 LABEL_NUSES (LABEL_REF_LABEL (x))++;
2829 return;
2832 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2834 if (fmt[i] == 'e')
2835 add_label_notes (XEXP (x, i), insn);
2836 else if (fmt[i] == 'E')
2837 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2838 add_label_notes (XVECEXP (x, i, j), insn);
2842 /* Code Hoisting variables and subroutines. */
2844 /* Very busy expressions. */
2845 static sbitmap *hoist_vbein;
2846 static sbitmap *hoist_vbeout;
2848 /* ??? We could compute post dominators and run this algorithm in
2849 reverse to perform tail merging, doing so would probably be
2850 more effective than the tail merging code in jump.c.
2852 It's unclear if tail merging could be run in parallel with
2853 code hoisting. It would be nice. */
2855 /* Allocate vars used for code hoisting analysis. */
2857 static void
2858 alloc_code_hoist_mem (int n_blocks, int n_exprs)
2860 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
2861 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
2862 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
2864 hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
2865 hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
2868 /* Free vars used for code hoisting analysis. */
2870 static void
2871 free_code_hoist_mem (void)
2873 sbitmap_vector_free (antloc);
2874 sbitmap_vector_free (transp);
2875 sbitmap_vector_free (comp);
2877 sbitmap_vector_free (hoist_vbein);
2878 sbitmap_vector_free (hoist_vbeout);
2880 free_dominance_info (CDI_DOMINATORS);
2883 /* Compute the very busy expressions at entry/exit from each block.
2885 An expression is very busy if all paths from a given point
2886 compute the expression. */
2888 static void
2889 compute_code_hoist_vbeinout (void)
2891 int changed, passes;
2892 basic_block bb;
2894 bitmap_vector_clear (hoist_vbeout, last_basic_block_for_fn (cfun));
2895 bitmap_vector_clear (hoist_vbein, last_basic_block_for_fn (cfun));
2897 passes = 0;
2898 changed = 1;
2900 while (changed)
2902 changed = 0;
2904 /* We scan the blocks in the reverse order to speed up
2905 the convergence. */
2906 FOR_EACH_BB_REVERSE_FN (bb, cfun)
2908 if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
2910 bitmap_intersection_of_succs (hoist_vbeout[bb->index],
2911 hoist_vbein, bb);
2913 /* Include expressions in VBEout that are calculated
2914 in BB and available at its end. */
2915 bitmap_ior (hoist_vbeout[bb->index],
2916 hoist_vbeout[bb->index], comp[bb->index]);
2919 changed |= bitmap_or_and (hoist_vbein[bb->index],
2920 antloc[bb->index],
2921 hoist_vbeout[bb->index],
2922 transp[bb->index]);
2925 passes++;
2928 if (dump_file)
2930 fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
2932 FOR_EACH_BB_FN (bb, cfun)
2934 fprintf (dump_file, "vbein (%d): ", bb->index);
2935 dump_bitmap_file (dump_file, hoist_vbein[bb->index]);
2936 fprintf (dump_file, "vbeout(%d): ", bb->index);
2937 dump_bitmap_file (dump_file, hoist_vbeout[bb->index]);
2942 /* Top level routine to do the dataflow analysis needed by code hoisting. */
2944 static void
2945 compute_code_hoist_data (void)
2947 compute_local_properties (transp, comp, antloc, &expr_hash_table);
2948 prune_expressions (false);
2949 compute_code_hoist_vbeinout ();
2950 calculate_dominance_info (CDI_DOMINATORS);
2951 if (dump_file)
2952 fprintf (dump_file, "\n");
2955 /* Update register pressure for BB when hoisting an expression from
2956 instruction FROM, if live ranges of inputs are shrunk. Also
2957 maintain live_in information if live range of register referred
2958 in FROM is shrunk.
2960 Return 0 if register pressure doesn't change, otherwise return
2961 the number by which register pressure is decreased.
2963 NOTE: Register pressure won't be increased in this function. */
2965 static int
2966 update_bb_reg_pressure (basic_block bb, rtx_insn *from)
2968 rtx dreg;
2969 rtx_insn *insn;
2970 basic_block succ_bb;
2971 df_ref use, op_ref;
2972 edge succ;
2973 edge_iterator ei;
2974 int decreased_pressure = 0;
2975 int nregs;
2976 enum reg_class pressure_class;
2978 FOR_EACH_INSN_USE (use, from)
2980 dreg = DF_REF_REAL_REG (use);
2981 /* The live range of register is shrunk only if it isn't:
2982 1. referred on any path from the end of this block to EXIT, or
2983 2. referred by insns other than FROM in this block. */
2984 FOR_EACH_EDGE (succ, ei, bb->succs)
2986 succ_bb = succ->dest;
2987 if (succ_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2988 continue;
2990 if (bitmap_bit_p (BB_DATA (succ_bb)->live_in, REGNO (dreg)))
2991 break;
2993 if (succ != NULL)
2994 continue;
2996 op_ref = DF_REG_USE_CHAIN (REGNO (dreg));
2997 for (; op_ref; op_ref = DF_REF_NEXT_REG (op_ref))
2999 if (!DF_REF_INSN_INFO (op_ref))
3000 continue;
3002 insn = DF_REF_INSN (op_ref);
3003 if (BLOCK_FOR_INSN (insn) == bb
3004 && NONDEBUG_INSN_P (insn) && insn != from)
3005 break;
3008 pressure_class = get_regno_pressure_class (REGNO (dreg), &nregs);
3009 /* Decrease register pressure and update live_in information for
3010 this block. */
3011 if (!op_ref && pressure_class != NO_REGS)
3013 decreased_pressure += nregs;
3014 BB_DATA (bb)->max_reg_pressure[pressure_class] -= nregs;
3015 bitmap_clear_bit (BB_DATA (bb)->live_in, REGNO (dreg));
3018 return decreased_pressure;
3021 /* Determine if the expression EXPR should be hoisted to EXPR_BB up in
3022 flow graph, if it can reach BB unimpared. Stop the search if the
3023 expression would need to be moved more than DISTANCE instructions.
3025 DISTANCE is the number of instructions through which EXPR can be
3026 hoisted up in flow graph.
3028 BB_SIZE points to an array which contains the number of instructions
3029 for each basic block.
3031 PRESSURE_CLASS and NREGS are register class and number of hard registers
3032 for storing EXPR.
3034 HOISTED_BBS points to a bitmap indicating basic blocks through which
3035 EXPR is hoisted.
3037 FROM is the instruction from which EXPR is hoisted.
3039 It's unclear exactly what Muchnick meant by "unimpared". It seems
3040 to me that the expression must either be computed or transparent in
3041 *every* block in the path(s) from EXPR_BB to BB. Any other definition
3042 would allow the expression to be hoisted out of loops, even if
3043 the expression wasn't a loop invariant.
3045 Contrast this to reachability for PRE where an expression is
3046 considered reachable if *any* path reaches instead of *all*
3047 paths. */
3049 static int
3050 should_hoist_expr_to_dom (basic_block expr_bb, struct gcse_expr *expr,
3051 basic_block bb, sbitmap visited, int distance,
3052 int *bb_size, enum reg_class pressure_class,
3053 int *nregs, bitmap hoisted_bbs, rtx_insn *from)
3055 unsigned int i;
3056 edge pred;
3057 edge_iterator ei;
3058 sbitmap_iterator sbi;
3059 int visited_allocated_locally = 0;
3060 int decreased_pressure = 0;
3062 if (flag_ira_hoist_pressure)
3064 /* Record old information of basic block BB when it is visited
3065 at the first time. */
3066 if (!bitmap_bit_p (hoisted_bbs, bb->index))
3068 struct bb_data *data = BB_DATA (bb);
3069 bitmap_copy (data->backup, data->live_in);
3070 data->old_pressure = data->max_reg_pressure[pressure_class];
3072 decreased_pressure = update_bb_reg_pressure (bb, from);
3074 /* Terminate the search if distance, for which EXPR is allowed to move,
3075 is exhausted. */
3076 if (distance > 0)
3078 if (flag_ira_hoist_pressure)
3080 /* Prefer to hoist EXPR if register pressure is decreased. */
3081 if (decreased_pressure > *nregs)
3082 distance += bb_size[bb->index];
3083 /* Let EXPR be hoisted through basic block at no cost if one
3084 of following conditions is satisfied:
3086 1. The basic block has low register pressure.
3087 2. Register pressure won't be increases after hoisting EXPR.
3089 Constant expressions is handled conservatively, because
3090 hoisting constant expression aggressively results in worse
3091 code. This decision is made by the observation of CSiBE
3092 on ARM target, while it has no obvious effect on other
3093 targets like x86, x86_64, mips and powerpc. */
3094 else if (CONST_INT_P (expr->expr)
3095 || (BB_DATA (bb)->max_reg_pressure[pressure_class]
3096 >= ira_class_hard_regs_num[pressure_class]
3097 && decreased_pressure < *nregs))
3098 distance -= bb_size[bb->index];
3100 else
3101 distance -= bb_size[bb->index];
3103 if (distance <= 0)
3104 return 0;
3106 else
3107 gcc_assert (distance == 0);
3109 if (visited == NULL)
3111 visited_allocated_locally = 1;
3112 visited = sbitmap_alloc (last_basic_block_for_fn (cfun));
3113 bitmap_clear (visited);
3116 FOR_EACH_EDGE (pred, ei, bb->preds)
3118 basic_block pred_bb = pred->src;
3120 if (pred->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
3121 break;
3122 else if (pred_bb == expr_bb)
3123 continue;
3124 else if (bitmap_bit_p (visited, pred_bb->index))
3125 continue;
3126 else if (! bitmap_bit_p (transp[pred_bb->index], expr->bitmap_index))
3127 break;
3128 /* Not killed. */
3129 else
3131 bitmap_set_bit (visited, pred_bb->index);
3132 if (! should_hoist_expr_to_dom (expr_bb, expr, pred_bb,
3133 visited, distance, bb_size,
3134 pressure_class, nregs,
3135 hoisted_bbs, from))
3136 break;
3139 if (visited_allocated_locally)
3141 /* If EXPR can be hoisted to expr_bb, record basic blocks through
3142 which EXPR is hoisted in hoisted_bbs. */
3143 if (flag_ira_hoist_pressure && !pred)
3145 /* Record the basic block from which EXPR is hoisted. */
3146 bitmap_set_bit (visited, bb->index);
3147 EXECUTE_IF_SET_IN_BITMAP (visited, 0, i, sbi)
3148 bitmap_set_bit (hoisted_bbs, i);
3150 sbitmap_free (visited);
3153 return (pred == NULL);
3156 /* Find occurrence in BB. */
3158 static struct gcse_occr *
3159 find_occr_in_bb (struct gcse_occr *occr, basic_block bb)
3161 /* Find the right occurrence of this expression. */
3162 while (occr && BLOCK_FOR_INSN (occr->insn) != bb)
3163 occr = occr->next;
3165 return occr;
3168 /* Actually perform code hoisting.
3170 The code hoisting pass can hoist multiple computations of the same
3171 expression along dominated path to a dominating basic block, like
3172 from b2/b3 to b1 as depicted below:
3174 b1 ------
3175 /\ |
3176 / \ |
3177 bx by distance
3178 / \ |
3179 / \ |
3180 b2 b3 ------
3182 Unfortunately code hoisting generally extends the live range of an
3183 output pseudo register, which increases register pressure and hurts
3184 register allocation. To address this issue, an attribute MAX_DISTANCE
3185 is computed and attached to each expression. The attribute is computed
3186 from rtx cost of the corresponding expression and it's used to control
3187 how long the expression can be hoisted up in flow graph. As the
3188 expression is hoisted up in flow graph, GCC decreases its DISTANCE
3189 and stops the hoist if DISTANCE reaches 0. Code hoisting can decrease
3190 register pressure if live ranges of inputs are shrunk.
3192 Option "-fira-hoist-pressure" implements register pressure directed
3193 hoist based on upper method. The rationale is:
3194 1. Calculate register pressure for each basic block by reusing IRA
3195 facility.
3196 2. When expression is hoisted through one basic block, GCC checks
3197 the change of live ranges for inputs/output. The basic block's
3198 register pressure will be increased because of extended live
3199 range of output. However, register pressure will be decreased
3200 if the live ranges of inputs are shrunk.
3201 3. After knowing how hoisting affects register pressure, GCC prefers
3202 to hoist the expression if it can decrease register pressure, by
3203 increasing DISTANCE of the corresponding expression.
3204 4. If hoisting the expression increases register pressure, GCC checks
3205 register pressure of the basic block and decrease DISTANCE only if
3206 the register pressure is high. In other words, expression will be
3207 hoisted through at no cost if the basic block has low register
3208 pressure.
3209 5. Update register pressure information for basic blocks through
3210 which expression is hoisted. */
3212 static int
3213 hoist_code (void)
3215 basic_block bb, dominated;
3216 vec<basic_block> dom_tree_walk;
3217 unsigned int dom_tree_walk_index;
3218 vec<basic_block> domby;
3219 unsigned int i, j, k;
3220 struct gcse_expr **index_map;
3221 struct gcse_expr *expr;
3222 int *to_bb_head;
3223 int *bb_size;
3224 int changed = 0;
3225 struct bb_data *data;
3226 /* Basic blocks that have occurrences reachable from BB. */
3227 bitmap from_bbs;
3228 /* Basic blocks through which expr is hoisted. */
3229 bitmap hoisted_bbs = NULL;
3230 bitmap_iterator bi;
3232 /* Compute a mapping from expression number (`bitmap_index') to
3233 hash table entry. */
3235 index_map = XCNEWVEC (struct gcse_expr *, expr_hash_table.n_elems);
3236 for (i = 0; i < expr_hash_table.size; i++)
3237 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
3238 index_map[expr->bitmap_index] = expr;
3240 /* Calculate sizes of basic blocks and note how far
3241 each instruction is from the start of its block. We then use this
3242 data to restrict distance an expression can travel. */
3244 to_bb_head = XCNEWVEC (int, get_max_uid ());
3245 bb_size = XCNEWVEC (int, last_basic_block_for_fn (cfun));
3247 FOR_EACH_BB_FN (bb, cfun)
3249 rtx_insn *insn;
3250 int to_head;
3252 to_head = 0;
3253 FOR_BB_INSNS (bb, insn)
3255 /* Don't count debug instructions to avoid them affecting
3256 decision choices. */
3257 if (NONDEBUG_INSN_P (insn))
3258 to_bb_head[INSN_UID (insn)] = to_head++;
3261 bb_size[bb->index] = to_head;
3264 gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) == 1
3265 && (EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun), 0)->dest
3266 == ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb));
3268 from_bbs = BITMAP_ALLOC (NULL);
3269 if (flag_ira_hoist_pressure)
3270 hoisted_bbs = BITMAP_ALLOC (NULL);
3272 dom_tree_walk = get_all_dominated_blocks (CDI_DOMINATORS,
3273 ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb);
3275 /* Walk over each basic block looking for potentially hoistable
3276 expressions, nothing gets hoisted from the entry block. */
3277 FOR_EACH_VEC_ELT (dom_tree_walk, dom_tree_walk_index, bb)
3279 domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH);
3281 if (domby.length () == 0)
3282 continue;
3284 /* Examine each expression that is very busy at the exit of this
3285 block. These are the potentially hoistable expressions. */
3286 for (i = 0; i < SBITMAP_SIZE (hoist_vbeout[bb->index]); i++)
3288 if (bitmap_bit_p (hoist_vbeout[bb->index], i))
3290 int nregs = 0;
3291 enum reg_class pressure_class = NO_REGS;
3292 /* Current expression. */
3293 struct gcse_expr *expr = index_map[i];
3294 /* Number of occurrences of EXPR that can be hoisted to BB. */
3295 int hoistable = 0;
3296 /* Occurrences reachable from BB. */
3297 vec<occr_t> occrs_to_hoist = vNULL;
3298 /* We want to insert the expression into BB only once, so
3299 note when we've inserted it. */
3300 int insn_inserted_p;
3301 occr_t occr;
3303 /* If an expression is computed in BB and is available at end of
3304 BB, hoist all occurrences dominated by BB to BB. */
3305 if (bitmap_bit_p (comp[bb->index], i))
3307 occr = find_occr_in_bb (expr->antic_occr, bb);
3309 if (occr)
3311 /* An occurrence might've been already deleted
3312 while processing a dominator of BB. */
3313 if (!occr->deleted_p)
3315 gcc_assert (NONDEBUG_INSN_P (occr->insn));
3316 hoistable++;
3319 else
3320 hoistable++;
3323 /* We've found a potentially hoistable expression, now
3324 we look at every block BB dominates to see if it
3325 computes the expression. */
3326 FOR_EACH_VEC_ELT (domby, j, dominated)
3328 int max_distance;
3330 /* Ignore self dominance. */
3331 if (bb == dominated)
3332 continue;
3333 /* We've found a dominated block, now see if it computes
3334 the busy expression and whether or not moving that
3335 expression to the "beginning" of that block is safe. */
3336 if (!bitmap_bit_p (antloc[dominated->index], i))
3337 continue;
3339 occr = find_occr_in_bb (expr->antic_occr, dominated);
3340 gcc_assert (occr);
3342 /* An occurrence might've been already deleted
3343 while processing a dominator of BB. */
3344 if (occr->deleted_p)
3345 continue;
3346 gcc_assert (NONDEBUG_INSN_P (occr->insn));
3348 max_distance = expr->max_distance;
3349 if (max_distance > 0)
3350 /* Adjust MAX_DISTANCE to account for the fact that
3351 OCCR won't have to travel all of DOMINATED, but
3352 only part of it. */
3353 max_distance += (bb_size[dominated->index]
3354 - to_bb_head[INSN_UID (occr->insn)]);
3356 pressure_class = get_pressure_class_and_nregs (occr->insn,
3357 &nregs);
3359 /* Note if the expression should be hoisted from the dominated
3360 block to BB if it can reach DOMINATED unimpared.
3362 Keep track of how many times this expression is hoistable
3363 from a dominated block into BB. */
3364 if (should_hoist_expr_to_dom (bb, expr, dominated, NULL,
3365 max_distance, bb_size,
3366 pressure_class, &nregs,
3367 hoisted_bbs, occr->insn))
3369 hoistable++;
3370 occrs_to_hoist.safe_push (occr);
3371 bitmap_set_bit (from_bbs, dominated->index);
3375 /* If we found more than one hoistable occurrence of this
3376 expression, then note it in the vector of expressions to
3377 hoist. It makes no sense to hoist things which are computed
3378 in only one BB, and doing so tends to pessimize register
3379 allocation. One could increase this value to try harder
3380 to avoid any possible code expansion due to register
3381 allocation issues; however experiments have shown that
3382 the vast majority of hoistable expressions are only movable
3383 from two successors, so raising this threshold is likely
3384 to nullify any benefit we get from code hoisting. */
3385 if (hoistable > 1 && dbg_cnt (hoist_insn))
3387 /* If (hoistable != vec::length), then there is
3388 an occurrence of EXPR in BB itself. Don't waste
3389 time looking for LCA in this case. */
3390 if ((unsigned) hoistable == occrs_to_hoist.length ())
3392 basic_block lca;
3394 lca = nearest_common_dominator_for_set (CDI_DOMINATORS,
3395 from_bbs);
3396 if (lca != bb)
3397 /* Punt, it's better to hoist these occurrences to
3398 LCA. */
3399 occrs_to_hoist.release ();
3402 else
3403 /* Punt, no point hoisting a single occurrence. */
3404 occrs_to_hoist.release ();
3406 if (flag_ira_hoist_pressure
3407 && !occrs_to_hoist.is_empty ())
3409 /* Increase register pressure of basic blocks to which
3410 expr is hoisted because of extended live range of
3411 output. */
3412 data = BB_DATA (bb);
3413 data->max_reg_pressure[pressure_class] += nregs;
3414 EXECUTE_IF_SET_IN_BITMAP (hoisted_bbs, 0, k, bi)
3416 data = BB_DATA (BASIC_BLOCK_FOR_FN (cfun, k));
3417 data->max_reg_pressure[pressure_class] += nregs;
3420 else if (flag_ira_hoist_pressure)
3422 /* Restore register pressure and live_in info for basic
3423 blocks recorded in hoisted_bbs when expr will not be
3424 hoisted. */
3425 EXECUTE_IF_SET_IN_BITMAP (hoisted_bbs, 0, k, bi)
3427 data = BB_DATA (BASIC_BLOCK_FOR_FN (cfun, k));
3428 bitmap_copy (data->live_in, data->backup);
3429 data->max_reg_pressure[pressure_class]
3430 = data->old_pressure;
3434 if (flag_ira_hoist_pressure)
3435 bitmap_clear (hoisted_bbs);
3437 insn_inserted_p = 0;
3439 /* Walk through occurrences of I'th expressions we want
3440 to hoist to BB and make the transformations. */
3441 FOR_EACH_VEC_ELT (occrs_to_hoist, j, occr)
3443 rtx_insn *insn;
3444 const_rtx set;
3446 gcc_assert (!occr->deleted_p);
3448 insn = occr->insn;
3449 set = single_set_gcse (insn);
3451 /* Create a pseudo-reg to store the result of reaching
3452 expressions into. Get the mode for the new pseudo
3453 from the mode of the original destination pseudo.
3455 It is important to use new pseudos whenever we
3456 emit a set. This will allow reload to use
3457 rematerialization for such registers. */
3458 if (!insn_inserted_p)
3459 expr->reaching_reg
3460 = gen_reg_rtx_and_attrs (SET_DEST (set));
3462 gcse_emit_move_after (SET_DEST (set), expr->reaching_reg,
3463 insn);
3464 delete_insn (insn);
3465 occr->deleted_p = 1;
3466 changed = 1;
3467 gcse_subst_count++;
3469 if (!insn_inserted_p)
3471 insert_insn_end_basic_block (expr, bb);
3472 insn_inserted_p = 1;
3476 occrs_to_hoist.release ();
3477 bitmap_clear (from_bbs);
3480 domby.release ();
3483 dom_tree_walk.release ();
3484 BITMAP_FREE (from_bbs);
3485 if (flag_ira_hoist_pressure)
3486 BITMAP_FREE (hoisted_bbs);
3488 free (bb_size);
3489 free (to_bb_head);
3490 free (index_map);
3492 return changed;
3495 /* Return pressure class and number of needed hard registers (through
3496 *NREGS) of register REGNO. */
3497 static enum reg_class
3498 get_regno_pressure_class (int regno, int *nregs)
3500 if (regno >= FIRST_PSEUDO_REGISTER)
3502 enum reg_class pressure_class;
3504 pressure_class = reg_allocno_class (regno);
3505 pressure_class = ira_pressure_class_translate[pressure_class];
3506 *nregs
3507 = ira_reg_class_max_nregs[pressure_class][PSEUDO_REGNO_MODE (regno)];
3508 return pressure_class;
3510 else if (! TEST_HARD_REG_BIT (ira_no_alloc_regs, regno)
3511 && ! TEST_HARD_REG_BIT (eliminable_regset, regno))
3513 *nregs = 1;
3514 return ira_pressure_class_translate[REGNO_REG_CLASS (regno)];
3516 else
3518 *nregs = 0;
3519 return NO_REGS;
3523 /* Return pressure class and number of hard registers (through *NREGS)
3524 for destination of INSN. */
3525 static enum reg_class
3526 get_pressure_class_and_nregs (rtx_insn *insn, int *nregs)
3528 rtx reg;
3529 enum reg_class pressure_class;
3530 const_rtx set = single_set_gcse (insn);
3532 reg = SET_DEST (set);
3533 if (GET_CODE (reg) == SUBREG)
3534 reg = SUBREG_REG (reg);
3535 if (MEM_P (reg))
3537 *nregs = 0;
3538 pressure_class = NO_REGS;
3540 else
3542 gcc_assert (REG_P (reg));
3543 pressure_class = reg_allocno_class (REGNO (reg));
3544 pressure_class = ira_pressure_class_translate[pressure_class];
3545 *nregs
3546 = ira_reg_class_max_nregs[pressure_class][GET_MODE (SET_SRC (set))];
3548 return pressure_class;
3551 /* Increase (if INCR_P) or decrease current register pressure for
3552 register REGNO. */
3553 static void
3554 change_pressure (int regno, bool incr_p)
3556 int nregs;
3557 enum reg_class pressure_class;
3559 pressure_class = get_regno_pressure_class (regno, &nregs);
3560 if (! incr_p)
3561 curr_reg_pressure[pressure_class] -= nregs;
3562 else
3564 curr_reg_pressure[pressure_class] += nregs;
3565 if (BB_DATA (curr_bb)->max_reg_pressure[pressure_class]
3566 < curr_reg_pressure[pressure_class])
3567 BB_DATA (curr_bb)->max_reg_pressure[pressure_class]
3568 = curr_reg_pressure[pressure_class];
3572 /* Calculate register pressure for each basic block by walking insns
3573 from last to first. */
3574 static void
3575 calculate_bb_reg_pressure (void)
3577 int i;
3578 unsigned int j;
3579 rtx_insn *insn;
3580 basic_block bb;
3581 bitmap curr_regs_live;
3582 bitmap_iterator bi;
3585 ira_setup_eliminable_regset ();
3586 curr_regs_live = BITMAP_ALLOC (&reg_obstack);
3587 FOR_EACH_BB_FN (bb, cfun)
3589 curr_bb = bb;
3590 BB_DATA (bb)->live_in = BITMAP_ALLOC (NULL);
3591 BB_DATA (bb)->backup = BITMAP_ALLOC (NULL);
3592 bitmap_copy (BB_DATA (bb)->live_in, df_get_live_in (bb));
3593 bitmap_copy (curr_regs_live, df_get_live_out (bb));
3594 for (i = 0; i < ira_pressure_classes_num; i++)
3595 curr_reg_pressure[ira_pressure_classes[i]] = 0;
3596 EXECUTE_IF_SET_IN_BITMAP (curr_regs_live, 0, j, bi)
3597 change_pressure (j, true);
3599 FOR_BB_INSNS_REVERSE (bb, insn)
3601 rtx dreg;
3602 int regno;
3603 df_ref def, use;
3605 if (! NONDEBUG_INSN_P (insn))
3606 continue;
3608 FOR_EACH_INSN_DEF (def, insn)
3610 dreg = DF_REF_REAL_REG (def);
3611 gcc_assert (REG_P (dreg));
3612 regno = REGNO (dreg);
3613 if (!(DF_REF_FLAGS (def)
3614 & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)))
3616 if (bitmap_clear_bit (curr_regs_live, regno))
3617 change_pressure (regno, false);
3621 FOR_EACH_INSN_USE (use, insn)
3623 dreg = DF_REF_REAL_REG (use);
3624 gcc_assert (REG_P (dreg));
3625 regno = REGNO (dreg);
3626 if (bitmap_set_bit (curr_regs_live, regno))
3627 change_pressure (regno, true);
3631 BITMAP_FREE (curr_regs_live);
3633 if (dump_file == NULL)
3634 return;
3636 fprintf (dump_file, "\nRegister Pressure: \n");
3637 FOR_EACH_BB_FN (bb, cfun)
3639 fprintf (dump_file, " Basic block %d: \n", bb->index);
3640 for (i = 0; (int) i < ira_pressure_classes_num; i++)
3642 enum reg_class pressure_class;
3644 pressure_class = ira_pressure_classes[i];
3645 if (BB_DATA (bb)->max_reg_pressure[pressure_class] == 0)
3646 continue;
3648 fprintf (dump_file, " %s=%d\n", reg_class_names[pressure_class],
3649 BB_DATA (bb)->max_reg_pressure[pressure_class]);
3652 fprintf (dump_file, "\n");
3655 /* Top level routine to perform one code hoisting (aka unification) pass
3657 Return nonzero if a change was made. */
3659 static int
3660 one_code_hoisting_pass (void)
3662 int changed = 0;
3664 gcse_subst_count = 0;
3665 gcse_create_count = 0;
3667 /* Return if there's nothing to do, or it is too expensive. */
3668 if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1
3669 || is_too_expensive (_("GCSE disabled")))
3670 return 0;
3672 doing_code_hoisting_p = true;
3674 /* Calculate register pressure for each basic block. */
3675 if (flag_ira_hoist_pressure)
3677 regstat_init_n_sets_and_refs ();
3678 ira_set_pseudo_classes (false, dump_file);
3679 alloc_aux_for_blocks (sizeof (struct bb_data));
3680 calculate_bb_reg_pressure ();
3681 regstat_free_n_sets_and_refs ();
3684 /* We need alias. */
3685 init_alias_analysis ();
3687 bytes_used = 0;
3688 gcc_obstack_init (&gcse_obstack);
3689 alloc_gcse_mem ();
3691 alloc_hash_table (&expr_hash_table);
3692 compute_hash_table (&expr_hash_table);
3693 if (dump_file)
3694 dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table);
3696 if (expr_hash_table.n_elems > 0)
3698 alloc_code_hoist_mem (last_basic_block_for_fn (cfun),
3699 expr_hash_table.n_elems);
3700 compute_code_hoist_data ();
3701 changed = hoist_code ();
3702 free_code_hoist_mem ();
3705 if (flag_ira_hoist_pressure)
3707 free_aux_for_blocks ();
3708 free_reg_info ();
3710 free_hash_table (&expr_hash_table);
3711 free_gcse_mem ();
3712 obstack_free (&gcse_obstack, NULL);
3714 /* We are finished with alias. */
3715 end_alias_analysis ();
3717 if (dump_file)
3719 fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ",
3720 current_function_name (), n_basic_blocks_for_fn (cfun),
3721 bytes_used);
3722 fprintf (dump_file, "%d substs, %d insns created\n",
3723 gcse_subst_count, gcse_create_count);
3726 doing_code_hoisting_p = false;
3728 return changed;
3731 /* Here we provide the things required to do store motion towards the exit.
3732 In order for this to be effective, gcse also needed to be taught how to
3733 move a load when it is killed only by a store to itself.
3735 int i;
3736 float a[10];
3738 void foo(float scale)
3740 for (i=0; i<10; i++)
3741 a[i] *= scale;
3744 'i' is both loaded and stored to in the loop. Normally, gcse cannot move
3745 the load out since its live around the loop, and stored at the bottom
3746 of the loop.
3748 The 'Load Motion' referred to and implemented in this file is
3749 an enhancement to gcse which when using edge based LCM, recognizes
3750 this situation and allows gcse to move the load out of the loop.
3752 Once gcse has hoisted the load, store motion can then push this
3753 load towards the exit, and we end up with no loads or stores of 'i'
3754 in the loop. */
3756 /* This will search the ldst list for a matching expression. If it
3757 doesn't find one, we create one and initialize it. */
3759 static struct ls_expr *
3760 ldst_entry (rtx x)
3762 int do_not_record_p = 0;
3763 struct ls_expr * ptr;
3764 unsigned int hash;
3765 ls_expr **slot;
3766 struct ls_expr e;
3768 hash = hash_rtx (x, GET_MODE (x), &do_not_record_p,
3769 NULL, /*have_reg_qty=*/false);
3771 e.pattern = x;
3772 slot = pre_ldst_table->find_slot_with_hash (&e, hash, INSERT);
3773 if (*slot)
3774 return *slot;
3776 ptr = XNEW (struct ls_expr);
3778 ptr->next = pre_ldst_mems;
3779 ptr->expr = NULL;
3780 ptr->pattern = x;
3781 ptr->pattern_regs = NULL_RTX;
3782 ptr->loads = NULL;
3783 ptr->stores = NULL;
3784 ptr->reaching_reg = NULL_RTX;
3785 ptr->invalid = 0;
3786 ptr->index = 0;
3787 ptr->hash_index = hash;
3788 pre_ldst_mems = ptr;
3789 *slot = ptr;
3791 return ptr;
3794 /* Free up an individual ldst entry. */
3796 static void
3797 free_ldst_entry (struct ls_expr * ptr)
3799 free_INSN_LIST_list (& ptr->loads);
3800 free_INSN_LIST_list (& ptr->stores);
3802 free (ptr);
3805 /* Free up all memory associated with the ldst list. */
3807 static void
3808 free_ld_motion_mems (void)
3810 delete pre_ldst_table;
3811 pre_ldst_table = NULL;
3813 while (pre_ldst_mems)
3815 struct ls_expr * tmp = pre_ldst_mems;
3817 pre_ldst_mems = pre_ldst_mems->next;
3819 free_ldst_entry (tmp);
3822 pre_ldst_mems = NULL;
3825 /* Dump debugging info about the ldst list. */
3827 static void
3828 print_ldst_list (FILE * file)
3830 struct ls_expr * ptr;
3832 fprintf (file, "LDST list: \n");
3834 for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
3836 fprintf (file, " Pattern (%3d): ", ptr->index);
3838 print_rtl (file, ptr->pattern);
3840 fprintf (file, "\n Loads : ");
3842 if (ptr->loads)
3843 print_rtl (file, ptr->loads);
3844 else
3845 fprintf (file, "(nil)");
3847 fprintf (file, "\n Stores : ");
3849 if (ptr->stores)
3850 print_rtl (file, ptr->stores);
3851 else
3852 fprintf (file, "(nil)");
3854 fprintf (file, "\n\n");
3857 fprintf (file, "\n");
3860 /* Returns 1 if X is in the list of ldst only expressions. */
3862 static struct ls_expr *
3863 find_rtx_in_ldst (rtx x)
3865 struct ls_expr e;
3866 ls_expr **slot;
3867 if (!pre_ldst_table)
3868 return NULL;
3869 e.pattern = x;
3870 slot = pre_ldst_table->find_slot (&e, NO_INSERT);
3871 if (!slot || (*slot)->invalid)
3872 return NULL;
3873 return *slot;
3876 /* Load Motion for loads which only kill themselves. */
3878 /* Return true if x, a MEM, is a simple access with no side effects.
3879 These are the types of loads we consider for the ld_motion list,
3880 otherwise we let the usual aliasing take care of it. */
3882 static int
3883 simple_mem (const_rtx x)
3885 if (MEM_VOLATILE_P (x))
3886 return 0;
3888 if (GET_MODE (x) == BLKmode)
3889 return 0;
3891 /* If we are handling exceptions, we must be careful with memory references
3892 that may trap. If we are not, the behavior is undefined, so we may just
3893 continue. */
3894 if (cfun->can_throw_non_call_exceptions && may_trap_p (x))
3895 return 0;
3897 if (side_effects_p (x))
3898 return 0;
3900 /* Do not consider function arguments passed on stack. */
3901 if (reg_mentioned_p (stack_pointer_rtx, x))
3902 return 0;
3904 if (flag_float_store && FLOAT_MODE_P (GET_MODE (x)))
3905 return 0;
3907 return 1;
3910 /* Make sure there isn't a buried reference in this pattern anywhere.
3911 If there is, invalidate the entry for it since we're not capable
3912 of fixing it up just yet.. We have to be sure we know about ALL
3913 loads since the aliasing code will allow all entries in the
3914 ld_motion list to not-alias itself. If we miss a load, we will get
3915 the wrong value since gcse might common it and we won't know to
3916 fix it up. */
3918 static void
3919 invalidate_any_buried_refs (rtx x)
3921 const char * fmt;
3922 int i, j;
3923 struct ls_expr * ptr;
3925 /* Invalidate it in the list. */
3926 if (MEM_P (x) && simple_mem (x))
3928 ptr = ldst_entry (x);
3929 ptr->invalid = 1;
3932 /* Recursively process the insn. */
3933 fmt = GET_RTX_FORMAT (GET_CODE (x));
3935 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3937 if (fmt[i] == 'e')
3938 invalidate_any_buried_refs (XEXP (x, i));
3939 else if (fmt[i] == 'E')
3940 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3941 invalidate_any_buried_refs (XVECEXP (x, i, j));
3945 /* Find all the 'simple' MEMs which are used in LOADs and STORES. Simple
3946 being defined as MEM loads and stores to symbols, with no side effects
3947 and no registers in the expression. For a MEM destination, we also
3948 check that the insn is still valid if we replace the destination with a
3949 REG, as is done in update_ld_motion_stores. If there are any uses/defs
3950 which don't match this criteria, they are invalidated and trimmed out
3951 later. */
3953 static void
3954 compute_ld_motion_mems (void)
3956 struct ls_expr * ptr;
3957 basic_block bb;
3958 rtx_insn *insn;
3960 pre_ldst_mems = NULL;
3961 pre_ldst_table = new hash_table<pre_ldst_expr_hasher> (13);
3963 FOR_EACH_BB_FN (bb, cfun)
3965 FOR_BB_INSNS (bb, insn)
3967 if (NONDEBUG_INSN_P (insn))
3969 if (GET_CODE (PATTERN (insn)) == SET)
3971 rtx src = SET_SRC (PATTERN (insn));
3972 rtx dest = SET_DEST (PATTERN (insn));
3973 rtx note = find_reg_equal_equiv_note (insn);
3974 rtx src_eq;
3976 /* Check for a simple LOAD... */
3977 if (MEM_P (src) && simple_mem (src))
3979 ptr = ldst_entry (src);
3980 if (REG_P (dest))
3981 ptr->loads = alloc_INSN_LIST (insn, ptr->loads);
3982 else
3983 ptr->invalid = 1;
3985 else
3987 /* Make sure there isn't a buried load somewhere. */
3988 invalidate_any_buried_refs (src);
3991 if (note != 0 && REG_NOTE_KIND (note) == REG_EQUAL)
3992 src_eq = XEXP (note, 0);
3993 else
3994 src_eq = NULL_RTX;
3996 if (src_eq != NULL_RTX
3997 && !(MEM_P (src_eq) && simple_mem (src_eq)))
3998 invalidate_any_buried_refs (src_eq);
4000 /* Check for stores. Don't worry about aliased ones, they
4001 will block any movement we might do later. We only care
4002 about this exact pattern since those are the only
4003 circumstance that we will ignore the aliasing info. */
4004 if (MEM_P (dest) && simple_mem (dest))
4006 ptr = ldst_entry (dest);
4008 if (! MEM_P (src)
4009 && GET_CODE (src) != ASM_OPERANDS
4010 /* Check for REG manually since want_to_gcse_p
4011 returns 0 for all REGs. */
4012 && can_assign_to_reg_without_clobbers_p (src))
4013 ptr->stores = alloc_INSN_LIST (insn, ptr->stores);
4014 else
4015 ptr->invalid = 1;
4018 else
4019 invalidate_any_buried_refs (PATTERN (insn));
4025 /* Remove any references that have been either invalidated or are not in the
4026 expression list for pre gcse. */
4028 static void
4029 trim_ld_motion_mems (void)
4031 struct ls_expr * * last = & pre_ldst_mems;
4032 struct ls_expr * ptr = pre_ldst_mems;
4034 while (ptr != NULL)
4036 struct gcse_expr * expr;
4038 /* Delete if entry has been made invalid. */
4039 if (! ptr->invalid)
4041 /* Delete if we cannot find this mem in the expression list. */
4042 unsigned int hash = ptr->hash_index % expr_hash_table.size;
4044 for (expr = expr_hash_table.table[hash];
4045 expr != NULL;
4046 expr = expr->next_same_hash)
4047 if (expr_equiv_p (expr->expr, ptr->pattern))
4048 break;
4050 else
4051 expr = (struct gcse_expr *) 0;
4053 if (expr)
4055 /* Set the expression field if we are keeping it. */
4056 ptr->expr = expr;
4057 last = & ptr->next;
4058 ptr = ptr->next;
4060 else
4062 *last = ptr->next;
4063 pre_ldst_table->remove_elt_with_hash (ptr, ptr->hash_index);
4064 free_ldst_entry (ptr);
4065 ptr = * last;
4069 /* Show the world what we've found. */
4070 if (dump_file && pre_ldst_mems != NULL)
4071 print_ldst_list (dump_file);
4074 /* This routine will take an expression which we are replacing with
4075 a reaching register, and update any stores that are needed if
4076 that expression is in the ld_motion list. Stores are updated by
4077 copying their SRC to the reaching register, and then storing
4078 the reaching register into the store location. These keeps the
4079 correct value in the reaching register for the loads. */
4081 static void
4082 update_ld_motion_stores (struct gcse_expr * expr)
4084 struct ls_expr * mem_ptr;
4086 if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
4088 /* We can try to find just the REACHED stores, but is shouldn't
4089 matter to set the reaching reg everywhere... some might be
4090 dead and should be eliminated later. */
4092 /* We replace (set mem expr) with (set reg expr) (set mem reg)
4093 where reg is the reaching reg used in the load. We checked in
4094 compute_ld_motion_mems that we can replace (set mem expr) with
4095 (set reg expr) in that insn. */
4096 rtx list = mem_ptr->stores;
4098 for ( ; list != NULL_RTX; list = XEXP (list, 1))
4100 rtx_insn *insn = as_a <rtx_insn *> (XEXP (list, 0));
4101 rtx pat = PATTERN (insn);
4102 rtx src = SET_SRC (pat);
4103 rtx reg = expr->reaching_reg;
4104 rtx copy;
4106 /* If we've already copied it, continue. */
4107 if (expr->reaching_reg == src)
4108 continue;
4110 if (dump_file)
4112 fprintf (dump_file, "PRE: store updated with reaching reg ");
4113 print_rtl (dump_file, reg);
4114 fprintf (dump_file, ":\n ");
4115 print_inline_rtx (dump_file, insn, 8);
4116 fprintf (dump_file, "\n");
4119 copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat)));
4120 emit_insn_before (copy, insn);
4121 SET_SRC (pat) = reg;
4122 df_insn_rescan (insn);
4124 /* un-recognize this pattern since it's probably different now. */
4125 INSN_CODE (insn) = -1;
4126 gcse_create_count++;
4131 /* Return true if the graph is too expensive to optimize. PASS is the
4132 optimization about to be performed. */
4134 static bool
4135 is_too_expensive (const char *pass)
4137 /* Trying to perform global optimizations on flow graphs which have
4138 a high connectivity will take a long time and is unlikely to be
4139 particularly useful.
4141 In normal circumstances a cfg should have about twice as many
4142 edges as blocks. But we do not want to punish small functions
4143 which have a couple switch statements. Rather than simply
4144 threshold the number of blocks, uses something with a more
4145 graceful degradation. */
4146 if (n_edges_for_fn (cfun) > 20000 + n_basic_blocks_for_fn (cfun) * 4)
4148 warning (OPT_Wdisabled_optimization,
4149 "%s: %d basic blocks and %d edges/basic block",
4150 pass, n_basic_blocks_for_fn (cfun),
4151 n_edges_for_fn (cfun) / n_basic_blocks_for_fn (cfun));
4153 return true;
4156 /* If allocating memory for the dataflow bitmaps would take up too much
4157 storage it's better just to disable the optimization. */
4158 if ((n_basic_blocks_for_fn (cfun)
4159 * SBITMAP_SET_SIZE (max_reg_num ())
4160 * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
4162 warning (OPT_Wdisabled_optimization,
4163 "%s: %d basic blocks and %d registers",
4164 pass, n_basic_blocks_for_fn (cfun), max_reg_num ());
4166 return true;
4169 return false;
4172 static unsigned int
4173 execute_rtl_pre (void)
4175 int changed;
4176 delete_unreachable_blocks ();
4177 df_analyze ();
4178 changed = one_pre_gcse_pass ();
4179 flag_rerun_cse_after_global_opts |= changed;
4180 if (changed)
4181 cleanup_cfg (0);
4182 return 0;
4185 static unsigned int
4186 execute_rtl_hoist (void)
4188 int changed;
4189 delete_unreachable_blocks ();
4190 df_analyze ();
4191 changed = one_code_hoisting_pass ();
4192 flag_rerun_cse_after_global_opts |= changed;
4193 if (changed)
4194 cleanup_cfg (0);
4195 return 0;
4198 namespace {
4200 const pass_data pass_data_rtl_pre =
4202 RTL_PASS, /* type */
4203 "rtl pre", /* name */
4204 OPTGROUP_NONE, /* optinfo_flags */
4205 TV_PRE, /* tv_id */
4206 PROP_cfglayout, /* properties_required */
4207 0, /* properties_provided */
4208 0, /* properties_destroyed */
4209 0, /* todo_flags_start */
4210 TODO_df_finish, /* todo_flags_finish */
4213 class pass_rtl_pre : public rtl_opt_pass
4215 public:
4216 pass_rtl_pre (gcc::context *ctxt)
4217 : rtl_opt_pass (pass_data_rtl_pre, ctxt)
4220 /* opt_pass methods: */
4221 virtual bool gate (function *);
4222 virtual unsigned int execute (function *) { return execute_rtl_pre (); }
4224 }; // class pass_rtl_pre
4226 /* We do not construct an accurate cfg in functions which call
4227 setjmp, so none of these passes runs if the function calls
4228 setjmp.
4229 FIXME: Should just handle setjmp via REG_SETJMP notes. */
4231 bool
4232 pass_rtl_pre::gate (function *fun)
4234 return optimize > 0 && flag_gcse
4235 && !fun->calls_setjmp
4236 && optimize_function_for_speed_p (fun)
4237 && dbg_cnt (pre);
4240 } // anon namespace
4242 rtl_opt_pass *
4243 make_pass_rtl_pre (gcc::context *ctxt)
4245 return new pass_rtl_pre (ctxt);
4248 namespace {
4250 const pass_data pass_data_rtl_hoist =
4252 RTL_PASS, /* type */
4253 "hoist", /* name */
4254 OPTGROUP_NONE, /* optinfo_flags */
4255 TV_HOIST, /* tv_id */
4256 PROP_cfglayout, /* properties_required */
4257 0, /* properties_provided */
4258 0, /* properties_destroyed */
4259 0, /* todo_flags_start */
4260 TODO_df_finish, /* todo_flags_finish */
4263 class pass_rtl_hoist : public rtl_opt_pass
4265 public:
4266 pass_rtl_hoist (gcc::context *ctxt)
4267 : rtl_opt_pass (pass_data_rtl_hoist, ctxt)
4270 /* opt_pass methods: */
4271 virtual bool gate (function *);
4272 virtual unsigned int execute (function *) { return execute_rtl_hoist (); }
4274 }; // class pass_rtl_hoist
4276 bool
4277 pass_rtl_hoist::gate (function *)
4279 return optimize > 0 && flag_gcse
4280 && !cfun->calls_setjmp
4281 /* It does not make sense to run code hoisting unless we are optimizing
4282 for code size -- it rarely makes programs faster, and can make then
4283 bigger if we did PRE (when optimizing for space, we don't run PRE). */
4284 && optimize_function_for_size_p (cfun)
4285 && dbg_cnt (hoist);
4288 } // anon namespace
4290 rtl_opt_pass *
4291 make_pass_rtl_hoist (gcc::context *ctxt)
4293 return new pass_rtl_hoist (ctxt);
4296 /* Reset all state within gcse.c so that we can rerun the compiler
4297 within the same process. For use by toplev::finalize. */
4299 void
4300 gcse_c_finalize (void)
4302 test_insn = NULL;
4305 #include "gt-gcse.h"