1 /* Partial redundancy elimination / Hoisting for RTL.
2 Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
3 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 - reordering of memory allocation and freeing to be more space efficient
23 - do rough calc of how many regs are needed in each block, and a rough
24 calc of how many regs are available in each class and use that to
25 throttle back the code in cases where RTX_COST is minimal.
28 /* References searched while implementing this.
30 Compilers Principles, Techniques and Tools
34 Global Optimization by Suppression of Partial Redundancies
36 communications of the acm, Vol. 22, Num. 2, Feb. 1979
38 A Portable Machine-Independent Global Optimizer - Design and Measurements
40 Stanford Ph.D. thesis, Dec. 1983
42 A Fast Algorithm for Code Movement Optimization
44 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
46 A Solution to a Problem with Morel and Renvoise's
47 Global Optimization by Suppression of Partial Redundancies
48 K-H Drechsler, M.P. Stadel
49 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
51 Practical Adaptation of the Global Optimization
52 Algorithm of Morel and Renvoise
54 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
56 Efficiently Computing Static Single Assignment Form and the Control
58 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
59 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
62 J. Knoop, O. Ruthing, B. Steffen
63 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
65 What's In a Region? Or Computing Control Dependence Regions in Near-Linear
66 Time for Reducible Flow Control
68 ACM Letters on Programming Languages and Systems,
69 Vol. 2, Num. 1-4, Mar-Dec 1993
71 An Efficient Representation for Sparse Sets
72 Preston Briggs, Linda Torczon
73 ACM Letters on Programming Languages and Systems,
74 Vol. 2, Num. 1-4, Mar-Dec 1993
76 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
77 K-H Drechsler, M.P. Stadel
78 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
80 Partial Dead Code Elimination
81 J. Knoop, O. Ruthing, B. Steffen
82 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
84 Effective Partial Redundancy Elimination
85 P. Briggs, K.D. Cooper
86 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
88 The Program Structure Tree: Computing Control Regions in Linear Time
89 R. Johnson, D. Pearson, K. Pingali
90 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
92 Optimal Code Motion: Theory and Practice
93 J. Knoop, O. Ruthing, B. Steffen
94 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
96 The power of assignment motion
97 J. Knoop, O. Ruthing, B. Steffen
98 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
100 Global code motion / global value numbering
102 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
104 Value Driven Redundancy Elimination
106 Rice University Ph.D. thesis, Apr. 1996
110 Massively Scalar Compiler Project, Rice University, Sep. 1996
112 High Performance Compilers for Parallel Computing
116 Advanced Compiler Design and Implementation
118 Morgan Kaufmann, 1997
120 Building an Optimizing Compiler
124 People wishing to speed up the code here should read:
125 Elimination Algorithms for Data Flow Analysis
126 B.G. Ryder, M.C. Paull
127 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
129 How to Analyze Large Programs Efficiently and Informatively
130 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
131 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
133 People wishing to do something different can find various possibilities
134 in the above papers and elsewhere.
139 #include "coretypes.h"
141 #include "diagnostic-core.h"
148 #include "hard-reg-set.h"
150 #include "insn-config.h"
152 #include "basic-block.h"
154 #include "function.h"
163 #include "tree-pass.h"
171 /* We support GCSE via Partial Redundancy Elimination. PRE optimizations
172 are a superset of those done by classic GCSE.
174 Two passes of copy/constant propagation are done around PRE or hoisting
175 because the first one enables more GCSE and the second one helps to clean
176 up the copies that PRE and HOIST create. This is needed more for PRE than
177 for HOIST because code hoisting will try to use an existing register
178 containing the common subexpression rather than create a new one. This is
179 harder to do for PRE because of the code motion (which HOIST doesn't do).
181 Expressions we are interested in GCSE-ing are of the form
182 (set (pseudo-reg) (expression)).
183 Function want_to_gcse_p says what these are.
185 In addition, expressions in REG_EQUAL notes are candidates for GCSE-ing.
186 This allows PRE to hoist expressions that are expressed in multiple insns,
187 such as complex address calculations (e.g. for PIC code, or loads with a
188 high part and a low part).
190 PRE handles moving invariant expressions out of loops (by treating them as
191 partially redundant).
193 **********************
195 We used to support multiple passes but there are diminishing returns in
196 doing so. The first pass usually makes 90% of the changes that are doable.
197 A second pass can make a few more changes made possible by the first pass.
198 Experiments show any further passes don't make enough changes to justify
201 A study of spec92 using an unlimited number of passes:
202 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
203 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
204 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
206 It was found doing copy propagation between each pass enables further
209 This study was done before expressions in REG_EQUAL notes were added as
210 candidate expressions for optimization, and before the GIMPLE optimizers
211 were added. Probably, multiple passes is even less efficient now than
212 at the time when the study was conducted.
214 PRE is quite expensive in complicated functions because the DFA can take
215 a while to converge. Hence we only perform one pass.
217 **********************
219 The steps for PRE are:
221 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
223 2) Perform the data flow analysis for PRE.
225 3) Delete the redundant instructions
227 4) Insert the required copies [if any] that make the partially
228 redundant instructions fully redundant.
230 5) For other reaching expressions, insert an instruction to copy the value
231 to a newly created pseudo that will reach the redundant instruction.
233 The deletion is done first so that when we do insertions we
234 know which pseudo reg to use.
236 Various papers have argued that PRE DFA is expensive (O(n^2)) and others
237 argue it is not. The number of iterations for the algorithm to converge
238 is typically 2-4 so I don't view it as that expensive (relatively speaking).
240 PRE GCSE depends heavily on the second CPROP pass to clean up the copies
241 we create. To make an expression reach the place where it's redundant,
242 the result of the expression is copied to a new register, and the redundant
243 expression is deleted by replacing it with this new register. Classic GCSE
244 doesn't have this problem as much as it computes the reaching defs of
245 each register in each block and thus can try to use an existing
248 /* GCSE global vars. */
250 struct target_gcse default_target_gcse
;
251 #if SWITCHABLE_TARGET
252 struct target_gcse
*this_target_gcse
= &default_target_gcse
;
255 /* Set to non-zero if CSE should run after all GCSE optimizations are done. */
256 int flag_rerun_cse_after_global_opts
;
258 /* An obstack for our working variables. */
259 static struct obstack gcse_obstack
;
261 struct reg_use
{rtx reg_rtx
; };
263 /* Hash table of expressions. */
267 /* The expression. */
269 /* Index in the available expression bitmaps. */
271 /* Next entry with the same hash. */
272 struct expr
*next_same_hash
;
273 /* List of anticipatable occurrences in basic blocks in the function.
274 An "anticipatable occurrence" is one that is the first occurrence in the
275 basic block, the operands are not modified in the basic block prior
276 to the occurrence and the output is not used between the start of
277 the block and the occurrence. */
278 struct occr
*antic_occr
;
279 /* List of available occurrence in basic blocks in the function.
280 An "available occurrence" is one that is the last occurrence in the
281 basic block and the operands are not modified by following statements in
282 the basic block [including this insn]. */
283 struct occr
*avail_occr
;
284 /* Non-null if the computation is PRE redundant.
285 The value is the newly created pseudo-reg to record a copy of the
286 expression in all the places that reach the redundant copy. */
288 /* Maximum distance in instructions this expression can travel.
289 We avoid moving simple expressions for more than a few instructions
290 to keep register pressure under control.
291 A value of "0" removes restrictions on how far the expression can
296 /* Occurrence of an expression.
297 There is one per basic block. If a pattern appears more than once the
298 last appearance is used [or first for anticipatable expressions]. */
302 /* Next occurrence of this expression. */
304 /* The insn that computes the expression. */
306 /* Nonzero if this [anticipatable] occurrence has been deleted. */
308 /* Nonzero if this [available] occurrence has been copied to
310 /* ??? This is mutually exclusive with deleted_p, so they could share
315 typedef struct occr
*occr_t
;
317 DEF_VEC_ALLOC_P (occr_t
, heap
);
319 /* Expression hash tables.
320 Each hash table is an array of buckets.
321 ??? It is known that if it were an array of entries, structure elements
322 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
323 not clear whether in the final analysis a sufficient amount of memory would
324 be saved as the size of the available expression bitmaps would be larger
325 [one could build a mapping table without holes afterwards though].
326 Someday I'll perform the computation and figure it out. */
331 This is an array of `expr_hash_table_size' elements. */
334 /* Size of the hash table, in elements. */
337 /* Number of hash table elements. */
338 unsigned int n_elems
;
341 /* Expression hash table. */
342 static struct hash_table_d expr_hash_table
;
344 /* This is a list of expressions which are MEMs and will be used by load
346 Load motion tracks MEMs which aren't killed by anything except itself,
347 i.e. loads and stores to a single location.
348 We can then allow movement of these MEM refs with a little special
349 allowance. (all stores copy the same value to the reaching reg used
350 for the loads). This means all values used to store into memory must have
351 no side effects so we can re-issue the setter value. */
355 struct expr
* expr
; /* Gcse expression reference for LM. */
356 rtx pattern
; /* Pattern of this mem. */
357 rtx pattern_regs
; /* List of registers mentioned by the mem. */
358 rtx loads
; /* INSN list of loads seen. */
359 rtx stores
; /* INSN list of stores seen. */
360 struct ls_expr
* next
; /* Next in the list. */
361 int invalid
; /* Invalid for some reason. */
362 int index
; /* If it maps to a bitmap index. */
363 unsigned int hash_index
; /* Index when in a hash table. */
364 rtx reaching_reg
; /* Register to use when re-writing. */
367 /* Head of the list of load/store memory refs. */
368 static struct ls_expr
* pre_ldst_mems
= NULL
;
370 /* Hashtable for the load/store memory refs. */
371 static htab_t pre_ldst_table
= NULL
;
373 /* Bitmap containing one bit for each register in the program.
374 Used when performing GCSE to track which registers have been set since
375 the start of the basic block. */
376 static regset reg_set_bitmap
;
378 /* Array, indexed by basic block number for a list of insns which modify
379 memory within that block. */
380 static VEC (rtx
,heap
) **modify_mem_list
;
381 static bitmap modify_mem_list_set
;
383 typedef struct modify_pair_s
385 rtx dest
; /* A MEM. */
386 rtx dest_addr
; /* The canonical address of `dest'. */
389 DEF_VEC_O(modify_pair
);
390 DEF_VEC_ALLOC_O(modify_pair
,heap
);
392 /* This array parallels modify_mem_list, except that it stores MEMs
393 being set and their canonicalized memory addresses. */
394 static VEC (modify_pair
,heap
) **canon_modify_mem_list
;
396 /* Bitmap indexed by block numbers to record which blocks contain
398 static bitmap blocks_with_calls
;
400 /* Various variables for statistics gathering. */
402 /* Memory used in a pass.
403 This isn't intended to be absolutely precise. Its intent is only
404 to keep an eye on memory usage. */
405 static int bytes_used
;
407 /* GCSE substitutions made. */
408 static int gcse_subst_count
;
409 /* Number of copy instructions created. */
410 static int gcse_create_count
;
412 /* Doing code hoisting. */
413 static bool doing_code_hoisting_p
= false;
415 /* For available exprs */
416 static sbitmap
*ae_kill
;
418 static void compute_can_copy (void);
419 static void *gmalloc (size_t) ATTRIBUTE_MALLOC
;
420 static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC
;
421 static void *gcse_alloc (unsigned long);
422 static void alloc_gcse_mem (void);
423 static void free_gcse_mem (void);
424 static void hash_scan_insn (rtx
, struct hash_table_d
*);
425 static void hash_scan_set (rtx
, rtx
, struct hash_table_d
*);
426 static void hash_scan_clobber (rtx
, rtx
, struct hash_table_d
*);
427 static void hash_scan_call (rtx
, rtx
, struct hash_table_d
*);
428 static int want_to_gcse_p (rtx
, int *);
429 static int oprs_unchanged_p (const_rtx
, const_rtx
, int);
430 static int oprs_anticipatable_p (const_rtx
, const_rtx
);
431 static int oprs_available_p (const_rtx
, const_rtx
);
432 static void insert_expr_in_table (rtx
, enum machine_mode
, rtx
, int, int, int,
433 struct hash_table_d
*);
434 static unsigned int hash_expr (const_rtx
, enum machine_mode
, int *, int);
435 static int expr_equiv_p (const_rtx
, const_rtx
);
436 static void record_last_reg_set_info (rtx
, int);
437 static void record_last_mem_set_info (rtx
);
438 static void record_last_set_info (rtx
, const_rtx
, void *);
439 static void compute_hash_table (struct hash_table_d
*);
440 static void alloc_hash_table (struct hash_table_d
*);
441 static void free_hash_table (struct hash_table_d
*);
442 static void compute_hash_table_work (struct hash_table_d
*);
443 static void dump_hash_table (FILE *, const char *, struct hash_table_d
*);
444 static void compute_transp (const_rtx
, int, sbitmap
*);
445 static void compute_local_properties (sbitmap
*, sbitmap
*, sbitmap
*,
446 struct hash_table_d
*);
447 static void mems_conflict_for_gcse_p (rtx
, const_rtx
, void *);
448 static int load_killed_in_block_p (const_basic_block
, int, const_rtx
, int);
449 static void canon_list_insert (rtx
, const_rtx
, void *);
450 static void alloc_pre_mem (int, int);
451 static void free_pre_mem (void);
452 static struct edge_list
*compute_pre_data (void);
453 static int pre_expr_reaches_here_p (basic_block
, struct expr
*,
455 static void insert_insn_end_basic_block (struct expr
*, basic_block
);
456 static void pre_insert_copy_insn (struct expr
*, rtx
);
457 static void pre_insert_copies (void);
458 static int pre_delete (void);
459 static int pre_gcse (struct edge_list
*);
460 static int one_pre_gcse_pass (void);
461 static void add_label_notes (rtx
, rtx
);
462 static void alloc_code_hoist_mem (int, int);
463 static void free_code_hoist_mem (void);
464 static void compute_code_hoist_vbeinout (void);
465 static void compute_code_hoist_data (void);
466 static int hoist_expr_reaches_here_p (basic_block
, int, basic_block
, char *,
468 static int hoist_code (void);
469 static int one_code_hoisting_pass (void);
470 static rtx
process_insert_insn (struct expr
*);
471 static int pre_edge_insert (struct edge_list
*, struct expr
**);
472 static int pre_expr_reaches_here_p_work (basic_block
, struct expr
*,
473 basic_block
, char *);
474 static struct ls_expr
* ldst_entry (rtx
);
475 static void free_ldst_entry (struct ls_expr
*);
476 static void free_ld_motion_mems (void);
477 static void print_ldst_list (FILE *);
478 static struct ls_expr
* find_rtx_in_ldst (rtx
);
479 static int simple_mem (const_rtx
);
480 static void invalidate_any_buried_refs (rtx
);
481 static void compute_ld_motion_mems (void);
482 static void trim_ld_motion_mems (void);
483 static void update_ld_motion_stores (struct expr
*);
484 static void clear_modify_mem_tables (void);
485 static void free_modify_mem_tables (void);
486 static rtx
gcse_emit_move_after (rtx
, rtx
, rtx
);
487 static bool is_too_expensive (const char *);
488 static unsigned find_loop_lsm_limit (struct loop
*);
489 static void adjust_loop_lsm_limit (struct loop
*, unsigned);
490 static void init_loop_lsm_limit (void);
491 static void fini_loop_lsm_limit (void);
493 #define GNEW(T) ((T *) gmalloc (sizeof (T)))
494 #define GCNEW(T) ((T *) gcalloc (1, sizeof (T)))
496 #define GNEWVEC(T, N) ((T *) gmalloc (sizeof (T) * (N)))
497 #define GCNEWVEC(T, N) ((T *) gcalloc ((N), sizeof (T)))
499 #define GNEWVAR(T, S) ((T *) gmalloc ((S)))
500 #define GCNEWVAR(T, S) ((T *) gcalloc (1, (S)))
502 #define GOBNEW(T) ((T *) gcse_alloc (sizeof (T)))
503 #define GOBNEWVAR(T, S) ((T *) gcse_alloc ((S)))
505 /* Misc. utilities. */
508 (this_target_gcse->x_can_copy)
509 #define can_copy_init_p \
510 (this_target_gcse->x_can_copy_init_p)
512 /* Compute which modes support reg/reg copy operations. */
515 compute_can_copy (void)
518 #ifndef AVOID_CCMODE_COPIES
521 memset (can_copy
, 0, NUM_MACHINE_MODES
);
524 for (i
= 0; i
< NUM_MACHINE_MODES
; i
++)
525 if (GET_MODE_CLASS (i
) == MODE_CC
)
527 #ifdef AVOID_CCMODE_COPIES
530 reg
= gen_rtx_REG ((enum machine_mode
) i
, LAST_VIRTUAL_REGISTER
+ 1);
531 insn
= emit_insn (gen_rtx_SET (VOIDmode
, reg
, reg
));
532 if (recog (PATTERN (insn
), insn
, NULL
) >= 0)
542 /* Returns whether the mode supports reg/reg copy operations. */
545 can_copy_p (enum machine_mode mode
)
547 if (! can_copy_init_p
)
550 can_copy_init_p
= true;
553 return can_copy
[mode
] != 0;
556 /* Cover function to xmalloc to record bytes allocated. */
559 gmalloc (size_t size
)
562 return xmalloc (size
);
565 /* Cover function to xcalloc to record bytes allocated. */
568 gcalloc (size_t nelem
, size_t elsize
)
570 bytes_used
+= nelem
* elsize
;
571 return xcalloc (nelem
, elsize
);
574 /* Cover function to obstack_alloc. */
577 gcse_alloc (unsigned long size
)
580 return obstack_alloc (&gcse_obstack
, size
);
583 /* Allocate memory for the reg/memory set tracking tables.
584 This is called at the start of each pass. */
587 alloc_gcse_mem (void)
589 /* Allocate vars to track sets of regs. */
590 reg_set_bitmap
= ALLOC_REG_SET (NULL
);
592 /* Allocate array to keep a list of insns which modify memory in each
594 modify_mem_list
= GCNEWVEC (VEC (rtx
,heap
) *, last_basic_block
);
595 canon_modify_mem_list
= GCNEWVEC (VEC (modify_pair
,heap
) *,
597 modify_mem_list_set
= BITMAP_ALLOC (NULL
);
598 blocks_with_calls
= BITMAP_ALLOC (NULL
);
601 /* Free memory allocated by alloc_gcse_mem. */
606 FREE_REG_SET (reg_set_bitmap
);
608 free_modify_mem_tables ();
609 BITMAP_FREE (modify_mem_list_set
);
610 BITMAP_FREE (blocks_with_calls
);
613 /* Compute the local properties of each recorded expression.
615 Local properties are those that are defined by the block, irrespective of
618 An expression is transparent in a block if its operands are not modified
621 An expression is computed (locally available) in a block if it is computed
622 at least once and expression would contain the same value if the
623 computation was moved to the end of the block.
625 An expression is locally anticipatable in a block if it is computed at
626 least once and expression would contain the same value if the computation
627 was moved to the beginning of the block.
629 We call this routine for pre and code hoisting. They all compute
630 basically the same information and thus can easily share this code.
632 TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
633 properties. If NULL, then it is not necessary to compute or record that
636 TABLE controls which hash table to look at. */
639 compute_local_properties (sbitmap
*transp
, sbitmap
*comp
, sbitmap
*antloc
,
640 struct hash_table_d
*table
)
644 /* Initialize any bitmaps that were passed in. */
647 sbitmap_vector_ones (transp
, last_basic_block
);
651 sbitmap_vector_zero (comp
, last_basic_block
);
653 sbitmap_vector_zero (antloc
, last_basic_block
);
655 for (i
= 0; i
< table
->size
; i
++)
659 for (expr
= table
->table
[i
]; expr
!= NULL
; expr
= expr
->next_same_hash
)
661 int indx
= expr
->bitmap_index
;
664 /* The expression is transparent in this block if it is not killed.
665 We start by assuming all are transparent [none are killed], and
666 then reset the bits for those that are. */
668 compute_transp (expr
->expr
, indx
, transp
);
670 /* The occurrences recorded in antic_occr are exactly those that
671 we want to set to nonzero in ANTLOC. */
673 for (occr
= expr
->antic_occr
; occr
!= NULL
; occr
= occr
->next
)
675 SET_BIT (antloc
[BLOCK_FOR_INSN (occr
->insn
)->index
], indx
);
677 /* While we're scanning the table, this is a good place to
682 /* The occurrences recorded in avail_occr are exactly those that
683 we want to set to nonzero in COMP. */
685 for (occr
= expr
->avail_occr
; occr
!= NULL
; occr
= occr
->next
)
687 SET_BIT (comp
[BLOCK_FOR_INSN (occr
->insn
)->index
], indx
);
689 /* While we're scanning the table, this is a good place to
694 /* While we're scanning the table, this is a good place to
696 expr
->reaching_reg
= 0;
701 /* Hash table support. */
703 struct reg_avail_info
710 static struct reg_avail_info
*reg_avail_info
;
711 static basic_block current_bb
;
713 /* See whether X, the source of a set, is something we want to consider for
717 want_to_gcse_p (rtx x
, int *max_distance_ptr
)
720 /* On register stack architectures, don't GCSE constants from the
721 constant pool, as the benefits are often swamped by the overhead
722 of shuffling the register stack between basic blocks. */
723 if (IS_STACK_MODE (GET_MODE (x
)))
724 x
= avoid_constant_pool_reference (x
);
727 /* GCSE'ing constants:
729 We do not specifically distinguish between constant and non-constant
730 expressions in PRE and Hoist. We use set_src_cost below to limit
731 the maximum distance simple expressions can travel.
733 Nevertheless, constants are much easier to GCSE, and, hence,
734 it is easy to overdo the optimizations. Usually, excessive PRE and
735 Hoisting of constant leads to increased register pressure.
737 RA can deal with this by rematerialing some of the constants.
738 Therefore, it is important that the back-end generates sets of constants
739 in a way that allows reload rematerialize them under high register
740 pressure, i.e., a pseudo register with REG_EQUAL to constant
741 is set only once. Failing to do so will result in IRA/reload
742 spilling such constants under high register pressure instead of
743 rematerializing them. */
745 switch (GET_CODE (x
))
756 if (!doing_code_hoisting_p
)
757 /* Do not PRE constants. */
763 if (doing_code_hoisting_p
)
764 /* PRE doesn't implement max_distance restriction. */
769 gcc_assert (!optimize_function_for_speed_p (cfun
)
770 && optimize_function_for_size_p (cfun
));
771 cost
= set_src_cost (x
, 0);
773 if (cost
< COSTS_N_INSNS (GCSE_UNRESTRICTED_COST
))
775 max_distance
= (GCSE_COST_DISTANCE_RATIO
* cost
) / 10;
776 if (max_distance
== 0)
779 gcc_assert (max_distance
> 0);
784 if (max_distance_ptr
)
785 *max_distance_ptr
= max_distance
;
788 return can_assign_to_reg_without_clobbers_p (x
);
792 /* Used internally by can_assign_to_reg_without_clobbers_p. */
794 static GTY(()) rtx test_insn
;
796 /* Return true if we can assign X to a pseudo register such that the
797 resulting insn does not result in clobbering a hard register as a
800 Additionally, if the target requires it, check that the resulting insn
801 can be copied. If it cannot, this means that X is special and probably
802 has hidden side-effects we don't want to mess with.
804 This function is typically used by code motion passes, to verify
805 that it is safe to insert an insn without worrying about clobbering
806 maybe live hard regs. */
809 can_assign_to_reg_without_clobbers_p (rtx x
)
811 int num_clobbers
= 0;
814 /* If this is a valid operand, we are OK. If it's VOIDmode, we aren't. */
815 if (general_operand (x
, GET_MODE (x
)))
817 else if (GET_MODE (x
) == VOIDmode
)
820 /* Otherwise, check if we can make a valid insn from it. First initialize
821 our test insn if we haven't already. */
825 = make_insn_raw (gen_rtx_SET (VOIDmode
,
826 gen_rtx_REG (word_mode
,
827 FIRST_PSEUDO_REGISTER
* 2),
829 NEXT_INSN (test_insn
) = PREV_INSN (test_insn
) = 0;
832 /* Now make an insn like the one we would make when GCSE'ing and see if
834 PUT_MODE (SET_DEST (PATTERN (test_insn
)), GET_MODE (x
));
835 SET_SRC (PATTERN (test_insn
)) = x
;
837 icode
= recog (PATTERN (test_insn
), test_insn
, &num_clobbers
);
841 if (num_clobbers
> 0 && added_clobbers_hard_reg_p (icode
))
844 if (targetm
.cannot_copy_insn_p
&& targetm
.cannot_copy_insn_p (test_insn
))
850 /* Return nonzero if the operands of expression X are unchanged from the
851 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
852 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */
855 oprs_unchanged_p (const_rtx x
, const_rtx insn
, int avail_p
)
869 struct reg_avail_info
*info
= ®_avail_info
[REGNO (x
)];
871 if (info
->last_bb
!= current_bb
)
874 return info
->last_set
< DF_INSN_LUID (insn
);
876 return info
->first_set
>= DF_INSN_LUID (insn
);
880 if (load_killed_in_block_p (current_bb
, DF_INSN_LUID (insn
),
884 return oprs_unchanged_p (XEXP (x
, 0), insn
, avail_p
);
911 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
915 /* If we are about to do the last recursive call needed at this
916 level, change it into iteration. This function is called enough
919 return oprs_unchanged_p (XEXP (x
, i
), insn
, avail_p
);
921 else if (! oprs_unchanged_p (XEXP (x
, i
), insn
, avail_p
))
924 else if (fmt
[i
] == 'E')
925 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
926 if (! oprs_unchanged_p (XVECEXP (x
, i
, j
), insn
, avail_p
))
933 /* Info passed from load_killed_in_block_p to mems_conflict_for_gcse_p. */
935 struct mem_conflict_info
937 /* A memory reference for a load instruction, mems_conflict_for_gcse_p will
938 see if a memory store conflicts with this memory load. */
941 /* True if mems_conflict_for_gcse_p finds a conflict between two memory
946 /* DEST is the output of an instruction. If it is a memory reference and
947 possibly conflicts with the load found in DATA, then communicate this
948 information back through DATA. */
951 mems_conflict_for_gcse_p (rtx dest
, const_rtx setter ATTRIBUTE_UNUSED
,
954 struct mem_conflict_info
*mci
= (struct mem_conflict_info
*) data
;
956 while (GET_CODE (dest
) == SUBREG
957 || GET_CODE (dest
) == ZERO_EXTRACT
958 || GET_CODE (dest
) == STRICT_LOW_PART
)
959 dest
= XEXP (dest
, 0);
961 /* If DEST is not a MEM, then it will not conflict with the load. Note
962 that function calls are assumed to clobber memory, but are handled
967 /* If we are setting a MEM in our list of specially recognized MEMs,
968 don't mark as killed this time. */
969 if (pre_ldst_mems
!= NULL
&& expr_equiv_p (dest
, mci
->mem
))
971 if (!find_rtx_in_ldst (dest
))
972 mci
->conflict
= true;
976 if (true_dependence (dest
, GET_MODE (dest
), mci
->mem
))
977 mci
->conflict
= true;
980 /* Return nonzero if the expression in X (a memory reference) is killed
981 in block BB before or after the insn with the LUID in UID_LIMIT.
982 AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
985 To check the entire block, set UID_LIMIT to max_uid + 1 and
989 load_killed_in_block_p (const_basic_block bb
, int uid_limit
, const_rtx x
,
992 VEC (rtx
,heap
) *list
= modify_mem_list
[bb
->index
];
996 /* If this is a readonly then we aren't going to be changing it. */
997 if (MEM_READONLY_P (x
))
1000 FOR_EACH_VEC_ELT_REVERSE (rtx
, list
, ix
, setter
)
1002 struct mem_conflict_info mci
;
1004 /* Ignore entries in the list that do not apply. */
1006 && DF_INSN_LUID (setter
) < uid_limit
)
1008 && DF_INSN_LUID (setter
) > uid_limit
))
1011 /* If SETTER is a call everything is clobbered. Note that calls
1012 to pure functions are never put on the list, so we need not
1013 worry about them. */
1014 if (CALL_P (setter
))
1017 /* SETTER must be an INSN of some kind that sets memory. Call
1018 note_stores to examine each hunk of memory that is modified. */
1020 mci
.conflict
= false;
1021 note_stores (PATTERN (setter
), mems_conflict_for_gcse_p
, &mci
);
1028 /* Return nonzero if the operands of expression X are unchanged from
1029 the start of INSN's basic block up to but not including INSN. */
1032 oprs_anticipatable_p (const_rtx x
, const_rtx insn
)
1034 return oprs_unchanged_p (x
, insn
, 0);
1037 /* Return nonzero if the operands of expression X are unchanged from
1038 INSN to the end of INSN's basic block. */
1041 oprs_available_p (const_rtx x
, const_rtx insn
)
1043 return oprs_unchanged_p (x
, insn
, 1);
1046 /* Hash expression X.
1048 MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean
1049 indicating if a volatile operand is found or if the expression contains
1050 something we don't want to insert in the table. HASH_TABLE_SIZE is
1051 the current size of the hash table to be probed. */
1054 hash_expr (const_rtx x
, enum machine_mode mode
, int *do_not_record_p
,
1055 int hash_table_size
)
1059 *do_not_record_p
= 0;
1061 hash
= hash_rtx (x
, mode
, do_not_record_p
, NULL
, /*have_reg_qty=*/false);
1062 return hash
% hash_table_size
;
1065 /* Return nonzero if exp1 is equivalent to exp2. */
1068 expr_equiv_p (const_rtx x
, const_rtx y
)
1070 return exp_equiv_p (x
, y
, 0, true);
1073 /* Insert expression X in INSN in the hash TABLE.
1074 If it is already present, record it as the last occurrence in INSN's
1077 MODE is the mode of the value X is being stored into.
1078 It is only used if X is a CONST_INT.
1080 ANTIC_P is nonzero if X is an anticipatable expression.
1081 AVAIL_P is nonzero if X is an available expression.
1083 MAX_DISTANCE is the maximum distance in instructions this expression can
1087 insert_expr_in_table (rtx x
, enum machine_mode mode
, rtx insn
, int antic_p
,
1088 int avail_p
, int max_distance
, struct hash_table_d
*table
)
1090 int found
, do_not_record_p
;
1092 struct expr
*cur_expr
, *last_expr
= NULL
;
1093 struct occr
*antic_occr
, *avail_occr
;
1095 hash
= hash_expr (x
, mode
, &do_not_record_p
, table
->size
);
1097 /* Do not insert expression in table if it contains volatile operands,
1098 or if hash_expr determines the expression is something we don't want
1099 to or can't handle. */
1100 if (do_not_record_p
)
1103 cur_expr
= table
->table
[hash
];
1106 while (cur_expr
&& 0 == (found
= expr_equiv_p (cur_expr
->expr
, x
)))
1108 /* If the expression isn't found, save a pointer to the end of
1110 last_expr
= cur_expr
;
1111 cur_expr
= cur_expr
->next_same_hash
;
1116 cur_expr
= GOBNEW (struct expr
);
1117 bytes_used
+= sizeof (struct expr
);
1118 if (table
->table
[hash
] == NULL
)
1119 /* This is the first pattern that hashed to this index. */
1120 table
->table
[hash
] = cur_expr
;
1122 /* Add EXPR to end of this hash chain. */
1123 last_expr
->next_same_hash
= cur_expr
;
1125 /* Set the fields of the expr element. */
1127 cur_expr
->bitmap_index
= table
->n_elems
++;
1128 cur_expr
->next_same_hash
= NULL
;
1129 cur_expr
->antic_occr
= NULL
;
1130 cur_expr
->avail_occr
= NULL
;
1131 gcc_assert (max_distance
>= 0);
1132 cur_expr
->max_distance
= max_distance
;
1135 gcc_assert (cur_expr
->max_distance
== max_distance
);
1137 /* Now record the occurrence(s). */
1140 antic_occr
= cur_expr
->antic_occr
;
1143 && BLOCK_FOR_INSN (antic_occr
->insn
) != BLOCK_FOR_INSN (insn
))
1147 /* Found another instance of the expression in the same basic block.
1148 Prefer the currently recorded one. We want the first one in the
1149 block and the block is scanned from start to end. */
1150 ; /* nothing to do */
1153 /* First occurrence of this expression in this basic block. */
1154 antic_occr
= GOBNEW (struct occr
);
1155 bytes_used
+= sizeof (struct occr
);
1156 antic_occr
->insn
= insn
;
1157 antic_occr
->next
= cur_expr
->antic_occr
;
1158 antic_occr
->deleted_p
= 0;
1159 cur_expr
->antic_occr
= antic_occr
;
1165 avail_occr
= cur_expr
->avail_occr
;
1168 && BLOCK_FOR_INSN (avail_occr
->insn
) == BLOCK_FOR_INSN (insn
))
1170 /* Found another instance of the expression in the same basic block.
1171 Prefer this occurrence to the currently recorded one. We want
1172 the last one in the block and the block is scanned from start
1174 avail_occr
->insn
= insn
;
1178 /* First occurrence of this expression in this basic block. */
1179 avail_occr
= GOBNEW (struct occr
);
1180 bytes_used
+= sizeof (struct occr
);
1181 avail_occr
->insn
= insn
;
1182 avail_occr
->next
= cur_expr
->avail_occr
;
1183 avail_occr
->deleted_p
= 0;
1184 cur_expr
->avail_occr
= avail_occr
;
1189 /* Scan SET present in INSN and add an entry to the hash TABLE. */
1192 hash_scan_set (rtx set
, rtx insn
, struct hash_table_d
*table
)
1194 rtx src
= SET_SRC (set
);
1195 rtx dest
= SET_DEST (set
);
1198 if (GET_CODE (src
) == CALL
)
1199 hash_scan_call (src
, insn
, table
);
1201 else if (REG_P (dest
))
1203 unsigned int regno
= REGNO (dest
);
1204 int max_distance
= 0;
1206 /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
1208 This allows us to do a single GCSE pass and still eliminate
1209 redundant constants, addresses or other expressions that are
1210 constructed with multiple instructions.
1212 However, keep the original SRC if INSN is a simple reg-reg move.
1213 In this case, there will almost always be a REG_EQUAL note on the
1214 insn that sets SRC. By recording the REG_EQUAL value here as SRC
1215 for INSN, we miss copy propagation opportunities and we perform the
1216 same PRE GCSE operation repeatedly on the same REG_EQUAL value if we
1217 do more than one PRE GCSE pass.
1219 Note that this does not impede profitable constant propagations. We
1220 "look through" reg-reg sets in lookup_avail_set. */
1221 note
= find_reg_equal_equiv_note (insn
);
1223 && REG_NOTE_KIND (note
) == REG_EQUAL
1225 && want_to_gcse_p (XEXP (note
, 0), NULL
))
1226 src
= XEXP (note
, 0), set
= gen_rtx_SET (VOIDmode
, dest
, src
);
1228 /* Only record sets of pseudo-regs in the hash table. */
1229 if (regno
>= FIRST_PSEUDO_REGISTER
1230 /* Don't GCSE something if we can't do a reg/reg copy. */
1231 && can_copy_p (GET_MODE (dest
))
1232 /* GCSE commonly inserts instruction after the insn. We can't
1233 do that easily for EH edges so disable GCSE on these for now. */
1234 /* ??? We can now easily create new EH landing pads at the
1235 gimple level, for splitting edges; there's no reason we
1236 can't do the same thing at the rtl level. */
1237 && !can_throw_internal (insn
)
1238 /* Is SET_SRC something we want to gcse? */
1239 && want_to_gcse_p (src
, &max_distance
)
1240 /* Don't CSE a nop. */
1241 && ! set_noop_p (set
)
1242 /* Don't GCSE if it has attached REG_EQUIV note.
1243 At this point this only function parameters should have
1244 REG_EQUIV notes and if the argument slot is used somewhere
1245 explicitly, it means address of parameter has been taken,
1246 so we should not extend the lifetime of the pseudo. */
1247 && (note
== NULL_RTX
|| ! MEM_P (XEXP (note
, 0))))
1249 /* An expression is not anticipatable if its operands are
1250 modified before this insn or if this is not the only SET in
1251 this insn. The latter condition does not have to mean that
1252 SRC itself is not anticipatable, but we just will not be
1253 able to handle code motion of insns with multiple sets. */
1254 int antic_p
= oprs_anticipatable_p (src
, insn
)
1255 && !multiple_sets (insn
);
1256 /* An expression is not available if its operands are
1257 subsequently modified, including this insn. It's also not
1258 available if this is a branch, because we can't insert
1259 a set after the branch. */
1260 int avail_p
= (oprs_available_p (src
, insn
)
1261 && ! JUMP_P (insn
));
1263 insert_expr_in_table (src
, GET_MODE (dest
), insn
, antic_p
, avail_p
,
1264 max_distance
, table
);
1267 /* In case of store we want to consider the memory value as available in
1268 the REG stored in that memory. This makes it possible to remove
1269 redundant loads from due to stores to the same location. */
1270 else if (flag_gcse_las
&& REG_P (src
) && MEM_P (dest
))
1272 unsigned int regno
= REGNO (src
);
1273 int max_distance
= 0;
1275 /* Only record sets of pseudo-regs in the hash table. */
1276 if (regno
>= FIRST_PSEUDO_REGISTER
1277 /* Don't GCSE something if we can't do a reg/reg copy. */
1278 && can_copy_p (GET_MODE (src
))
1279 /* GCSE commonly inserts instruction after the insn. We can't
1280 do that easily for EH edges so disable GCSE on these for now. */
1281 && !can_throw_internal (insn
)
1282 /* Is SET_DEST something we want to gcse? */
1283 && want_to_gcse_p (dest
, &max_distance
)
1284 /* Don't CSE a nop. */
1285 && ! set_noop_p (set
)
1286 /* Don't GCSE if it has attached REG_EQUIV note.
1287 At this point this only function parameters should have
1288 REG_EQUIV notes and if the argument slot is used somewhere
1289 explicitly, it means address of parameter has been taken,
1290 so we should not extend the lifetime of the pseudo. */
1291 && ((note
= find_reg_note (insn
, REG_EQUIV
, NULL_RTX
)) == 0
1292 || ! MEM_P (XEXP (note
, 0))))
1294 /* Stores are never anticipatable. */
1296 /* An expression is not available if its operands are
1297 subsequently modified, including this insn. It's also not
1298 available if this is a branch, because we can't insert
1299 a set after the branch. */
1300 int avail_p
= oprs_available_p (dest
, insn
)
1303 /* Record the memory expression (DEST) in the hash table. */
1304 insert_expr_in_table (dest
, GET_MODE (dest
), insn
,
1305 antic_p
, avail_p
, max_distance
, table
);
1311 hash_scan_clobber (rtx x ATTRIBUTE_UNUSED
, rtx insn ATTRIBUTE_UNUSED
,
1312 struct hash_table_d
*table ATTRIBUTE_UNUSED
)
1314 /* Currently nothing to do. */
1318 hash_scan_call (rtx x ATTRIBUTE_UNUSED
, rtx insn ATTRIBUTE_UNUSED
,
1319 struct hash_table_d
*table ATTRIBUTE_UNUSED
)
1321 /* Currently nothing to do. */
1324 /* Process INSN and add hash table entries as appropriate. */
1327 hash_scan_insn (rtx insn
, struct hash_table_d
*table
)
1329 rtx pat
= PATTERN (insn
);
1332 /* Pick out the sets of INSN and for other forms of instructions record
1333 what's been modified. */
1335 if (GET_CODE (pat
) == SET
)
1336 hash_scan_set (pat
, insn
, table
);
1338 else if (GET_CODE (pat
) == CLOBBER
)
1339 hash_scan_clobber (pat
, insn
, table
);
1341 else if (GET_CODE (pat
) == CALL
)
1342 hash_scan_call (pat
, insn
, table
);
1344 else if (GET_CODE (pat
) == PARALLEL
)
1345 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
1347 rtx x
= XVECEXP (pat
, 0, i
);
1349 if (GET_CODE (x
) == SET
)
1350 hash_scan_set (x
, insn
, table
);
1351 else if (GET_CODE (x
) == CLOBBER
)
1352 hash_scan_clobber (x
, insn
, table
);
1353 else if (GET_CODE (x
) == CALL
)
1354 hash_scan_call (x
, insn
, table
);
1358 /* Dump the hash table TABLE to file FILE under the name NAME. */
1361 dump_hash_table (FILE *file
, const char *name
, struct hash_table_d
*table
)
1364 /* Flattened out table, so it's printed in proper order. */
1365 struct expr
**flat_table
;
1366 unsigned int *hash_val
;
1369 flat_table
= XCNEWVEC (struct expr
*, table
->n_elems
);
1370 hash_val
= XNEWVEC (unsigned int, table
->n_elems
);
1372 for (i
= 0; i
< (int) table
->size
; i
++)
1373 for (expr
= table
->table
[i
]; expr
!= NULL
; expr
= expr
->next_same_hash
)
1375 flat_table
[expr
->bitmap_index
] = expr
;
1376 hash_val
[expr
->bitmap_index
] = i
;
1379 fprintf (file
, "%s hash table (%d buckets, %d entries)\n",
1380 name
, table
->size
, table
->n_elems
);
1382 for (i
= 0; i
< (int) table
->n_elems
; i
++)
1383 if (flat_table
[i
] != 0)
1385 expr
= flat_table
[i
];
1386 fprintf (file
, "Index %d (hash value %d; max distance %d)\n ",
1387 expr
->bitmap_index
, hash_val
[i
], expr
->max_distance
);
1388 print_rtl (file
, expr
->expr
);
1389 fprintf (file
, "\n");
1392 fprintf (file
, "\n");
1398 /* Record register first/last/block set information for REGNO in INSN.
1400 first_set records the first place in the block where the register
1401 is set and is used to compute "anticipatability".
1403 last_set records the last place in the block where the register
1404 is set and is used to compute "availability".
1406 last_bb records the block for which first_set and last_set are
1407 valid, as a quick test to invalidate them. */
1410 record_last_reg_set_info (rtx insn
, int regno
)
1412 struct reg_avail_info
*info
= ®_avail_info
[regno
];
1413 int luid
= DF_INSN_LUID (insn
);
1415 info
->last_set
= luid
;
1416 if (info
->last_bb
!= current_bb
)
1418 info
->last_bb
= current_bb
;
1419 info
->first_set
= luid
;
1423 /* Record all of the canonicalized MEMs of record_last_mem_set_info's insn.
1424 Note we store a pair of elements in the list, so they have to be
1425 taken off pairwise. */
1428 canon_list_insert (rtx dest ATTRIBUTE_UNUSED
, const_rtx x ATTRIBUTE_UNUSED
,
1431 rtx dest_addr
, insn
;
1435 while (GET_CODE (dest
) == SUBREG
1436 || GET_CODE (dest
) == ZERO_EXTRACT
1437 || GET_CODE (dest
) == STRICT_LOW_PART
)
1438 dest
= XEXP (dest
, 0);
1440 /* If DEST is not a MEM, then it will not conflict with a load. Note
1441 that function calls are assumed to clobber memory, but are handled
1447 dest_addr
= get_addr (XEXP (dest
, 0));
1448 dest_addr
= canon_rtx (dest_addr
);
1449 insn
= (rtx
) v_insn
;
1450 bb
= BLOCK_FOR_INSN (insn
)->index
;
1452 pair
= VEC_safe_push (modify_pair
, heap
, canon_modify_mem_list
[bb
], NULL
);
1454 pair
->dest_addr
= dest_addr
;
1457 /* Record memory modification information for INSN. We do not actually care
1458 about the memory location(s) that are set, or even how they are set (consider
1459 a CALL_INSN). We merely need to record which insns modify memory. */
1462 record_last_mem_set_info (rtx insn
)
1464 int bb
= BLOCK_FOR_INSN (insn
)->index
;
1466 /* load_killed_in_block_p will handle the case of calls clobbering
1468 VEC_safe_push (rtx
, heap
, modify_mem_list
[bb
], insn
);
1469 bitmap_set_bit (modify_mem_list_set
, bb
);
1472 bitmap_set_bit (blocks_with_calls
, bb
);
1474 note_stores (PATTERN (insn
), canon_list_insert
, (void*) insn
);
1477 /* Called from compute_hash_table via note_stores to handle one
1478 SET or CLOBBER in an insn. DATA is really the instruction in which
1479 the SET is taking place. */
1482 record_last_set_info (rtx dest
, const_rtx setter ATTRIBUTE_UNUSED
, void *data
)
1484 rtx last_set_insn
= (rtx
) data
;
1486 if (GET_CODE (dest
) == SUBREG
)
1487 dest
= SUBREG_REG (dest
);
1490 record_last_reg_set_info (last_set_insn
, REGNO (dest
));
1491 else if (MEM_P (dest
)
1492 /* Ignore pushes, they clobber nothing. */
1493 && ! push_operand (dest
, GET_MODE (dest
)))
1494 record_last_mem_set_info (last_set_insn
);
1497 /* Top level function to create an expression hash table.
1499 Expression entries are placed in the hash table if
1500 - they are of the form (set (pseudo-reg) src),
1501 - src is something we want to perform GCSE on,
1502 - none of the operands are subsequently modified in the block
1504 Currently src must be a pseudo-reg or a const_int.
1506 TABLE is the table computed. */
1509 compute_hash_table_work (struct hash_table_d
*table
)
1513 /* re-Cache any INSN_LIST nodes we have allocated. */
1514 clear_modify_mem_tables ();
1515 /* Some working arrays used to track first and last set in each block. */
1516 reg_avail_info
= GNEWVEC (struct reg_avail_info
, max_reg_num ());
1518 for (i
= 0; i
< max_reg_num (); ++i
)
1519 reg_avail_info
[i
].last_bb
= NULL
;
1521 FOR_EACH_BB (current_bb
)
1526 /* First pass over the instructions records information used to
1527 determine when registers and memory are first and last set. */
1528 FOR_BB_INSNS (current_bb
, insn
)
1530 if (!NONDEBUG_INSN_P (insn
))
1535 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
1536 if (TEST_HARD_REG_BIT (regs_invalidated_by_call
, regno
))
1537 record_last_reg_set_info (insn
, regno
);
1539 if (! RTL_CONST_OR_PURE_CALL_P (insn
))
1540 record_last_mem_set_info (insn
);
1543 note_stores (PATTERN (insn
), record_last_set_info
, insn
);
1546 /* The next pass builds the hash table. */
1547 FOR_BB_INSNS (current_bb
, insn
)
1548 if (NONDEBUG_INSN_P (insn
))
1549 hash_scan_insn (insn
, table
);
1552 free (reg_avail_info
);
1553 reg_avail_info
= NULL
;
1556 /* Allocate space for the set/expr hash TABLE.
1557 It is used to determine the number of buckets to use. */
1560 alloc_hash_table (struct hash_table_d
*table
)
1564 n
= get_max_insn_count ();
1566 table
->size
= n
/ 4;
1567 if (table
->size
< 11)
1570 /* Attempt to maintain efficient use of hash table.
1571 Making it an odd number is simplest for now.
1572 ??? Later take some measurements. */
1574 n
= table
->size
* sizeof (struct expr
*);
1575 table
->table
= GNEWVAR (struct expr
*, n
);
1578 /* Free things allocated by alloc_hash_table. */
1581 free_hash_table (struct hash_table_d
*table
)
1583 free (table
->table
);
1586 /* Compute the expression hash table TABLE. */
1589 compute_hash_table (struct hash_table_d
*table
)
1591 /* Initialize count of number of entries in hash table. */
1593 memset (table
->table
, 0, table
->size
* sizeof (struct expr
*));
1595 compute_hash_table_work (table
);
1598 /* Expression tracking support. */
1600 /* Clear canon_modify_mem_list and modify_mem_list tables. */
1602 clear_modify_mem_tables (void)
1607 EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set
, 0, i
, bi
)
1609 VEC_free (rtx
, heap
, modify_mem_list
[i
]);
1610 VEC_free (modify_pair
, heap
, canon_modify_mem_list
[i
]);
1612 bitmap_clear (modify_mem_list_set
);
1613 bitmap_clear (blocks_with_calls
);
1616 /* Release memory used by modify_mem_list_set. */
1619 free_modify_mem_tables (void)
1621 clear_modify_mem_tables ();
1622 free (modify_mem_list
);
1623 free (canon_modify_mem_list
);
1624 modify_mem_list
= 0;
1625 canon_modify_mem_list
= 0;
1628 /* For each block, compute whether X is transparent. X is either an
1629 expression or an assignment [though we don't care which, for this context
1630 an assignment is treated as an expression]. For each block where an
1631 element of X is modified, reset the INDX bit in BMAP. */
1634 compute_transp (const_rtx x
, int indx
, sbitmap
*bmap
)
1640 /* repeat is used to turn tail-recursion into iteration since GCC
1641 can't do it when there's no return value. */
1647 code
= GET_CODE (x
);
1653 for (def
= DF_REG_DEF_CHAIN (REGNO (x
));
1655 def
= DF_REF_NEXT_REG (def
))
1656 RESET_BIT (bmap
[DF_REF_BB (def
)->index
], indx
);
1662 if (! MEM_READONLY_P (x
))
1668 x_addr
= get_addr (XEXP (x
, 0));
1669 x_addr
= canon_rtx (x_addr
);
1671 /* First handle all the blocks with calls. We don't need to
1672 do any list walking for them. */
1673 EXECUTE_IF_SET_IN_BITMAP (blocks_with_calls
, 0, bb_index
, bi
)
1675 RESET_BIT (bmap
[bb_index
], indx
);
1678 /* Now iterate over the blocks which have memory modifications
1679 but which do not have any calls. */
1680 EXECUTE_IF_AND_COMPL_IN_BITMAP (modify_mem_list_set
,
1684 VEC (modify_pair
,heap
) *list
1685 = canon_modify_mem_list
[bb_index
];
1689 FOR_EACH_VEC_ELT_REVERSE (modify_pair
, list
, ix
, pair
)
1691 rtx dest
= pair
->dest
;
1692 rtx dest_addr
= pair
->dest_addr
;
1694 if (canon_true_dependence (dest
, GET_MODE (dest
),
1695 dest_addr
, x
, x_addr
))
1696 RESET_BIT (bmap
[bb_index
], indx
);
1721 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
1725 /* If we are about to do the last recursive call
1726 needed at this level, change it into iteration.
1727 This function is called enough to be worth it. */
1734 compute_transp (XEXP (x
, i
), indx
, bmap
);
1736 else if (fmt
[i
] == 'E')
1737 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
1738 compute_transp (XVECEXP (x
, i
, j
), indx
, bmap
);
1742 /* Compute PRE+LCM working variables. */
1744 /* Local properties of expressions. */
1746 /* Nonzero for expressions that are transparent in the block. */
1747 static sbitmap
*transp
;
1749 /* Nonzero for expressions that are computed (available) in the block. */
1750 static sbitmap
*comp
;
1752 /* Nonzero for expressions that are locally anticipatable in the block. */
1753 static sbitmap
*antloc
;
1755 /* Nonzero for expressions where this block is an optimal computation
1757 static sbitmap
*pre_optimal
;
1759 /* Nonzero for expressions which are redundant in a particular block. */
1760 static sbitmap
*pre_redundant
;
1762 /* Nonzero for expressions which should be inserted on a specific edge. */
1763 static sbitmap
*pre_insert_map
;
1765 /* Nonzero for expressions which should be deleted in a specific block. */
1766 static sbitmap
*pre_delete_map
;
1768 /* Allocate vars used for PRE analysis. */
1771 alloc_pre_mem (int n_blocks
, int n_exprs
)
1773 transp
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
1774 comp
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
1775 antloc
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
1778 pre_redundant
= NULL
;
1779 pre_insert_map
= NULL
;
1780 pre_delete_map
= NULL
;
1781 ae_kill
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
1783 /* pre_insert and pre_delete are allocated later. */
1786 /* Free vars used for PRE analysis. */
1791 sbitmap_vector_free (transp
);
1792 sbitmap_vector_free (comp
);
1794 /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */
1797 sbitmap_vector_free (pre_optimal
);
1799 sbitmap_vector_free (pre_redundant
);
1801 sbitmap_vector_free (pre_insert_map
);
1803 sbitmap_vector_free (pre_delete_map
);
1805 transp
= comp
= NULL
;
1806 pre_optimal
= pre_redundant
= pre_insert_map
= pre_delete_map
= NULL
;
1809 /* Remove certain expressions from anticipatable and transparent
1810 sets of basic blocks that have incoming abnormal edge.
1811 For PRE remove potentially trapping expressions to avoid placing
1812 them on abnormal edges. For hoisting remove memory references that
1813 can be clobbered by calls. */
1816 prune_expressions (bool pre_p
)
1818 sbitmap prune_exprs
;
1823 prune_exprs
= sbitmap_alloc (expr_hash_table
.n_elems
);
1824 sbitmap_zero (prune_exprs
);
1825 for (ui
= 0; ui
< expr_hash_table
.size
; ui
++)
1827 for (expr
= expr_hash_table
.table
[ui
]; expr
; expr
= expr
->next_same_hash
)
1829 /* Note potentially trapping expressions. */
1830 if (may_trap_p (expr
->expr
))
1832 SET_BIT (prune_exprs
, expr
->bitmap_index
);
1836 if (!pre_p
&& MEM_P (expr
->expr
))
1837 /* Note memory references that can be clobbered by a call.
1838 We do not split abnormal edges in hoisting, so would
1839 a memory reference get hoisted along an abnormal edge,
1840 it would be placed /before/ the call. Therefore, only
1841 constant memory references can be hoisted along abnormal
1844 if (GET_CODE (XEXP (expr
->expr
, 0)) == SYMBOL_REF
1845 && CONSTANT_POOL_ADDRESS_P (XEXP (expr
->expr
, 0)))
1848 if (MEM_READONLY_P (expr
->expr
)
1849 && !MEM_VOLATILE_P (expr
->expr
)
1850 && MEM_NOTRAP_P (expr
->expr
))
1851 /* Constant memory reference, e.g., a PIC address. */
1854 /* ??? Optimally, we would use interprocedural alias
1855 analysis to determine if this mem is actually killed
1858 SET_BIT (prune_exprs
, expr
->bitmap_index
);
1868 /* If the current block is the destination of an abnormal edge, we
1869 kill all trapping (for PRE) and memory (for hoist) expressions
1870 because we won't be able to properly place the instruction on
1871 the edge. So make them neither anticipatable nor transparent.
1872 This is fairly conservative.
1874 ??? For hoisting it may be necessary to check for set-and-jump
1875 instructions here, not just for abnormal edges. The general problem
1876 is that when an expression cannot not be placed right at the end of
1877 a basic block we should account for any side-effects of a subsequent
1878 jump instructions that could clobber the expression. It would
1879 be best to implement this check along the lines of
1880 hoist_expr_reaches_here_p where the target block is already known
1881 and, hence, there's no need to conservatively prune expressions on
1882 "intermediate" set-and-jump instructions. */
1883 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1884 if ((e
->flags
& EDGE_ABNORMAL
)
1885 && (pre_p
|| CALL_P (BB_END (e
->src
))))
1887 sbitmap_difference (antloc
[bb
->index
],
1888 antloc
[bb
->index
], prune_exprs
);
1889 sbitmap_difference (transp
[bb
->index
],
1890 transp
[bb
->index
], prune_exprs
);
1895 sbitmap_free (prune_exprs
);
1898 /* It may be necessary to insert a large number of insns on edges to
1899 make the existing occurrences of expressions fully redundant. This
1900 routine examines the set of insertions and deletions and if the ratio
1901 of insertions to deletions is too high for a particular expression, then
1902 the expression is removed from the insertion/deletion sets.
1904 N_ELEMS is the number of elements in the hash table. */
1907 prune_insertions_deletions (int n_elems
)
1909 sbitmap_iterator sbi
;
1910 sbitmap prune_exprs
;
1912 /* We always use I to iterate over blocks/edges and J to iterate over
1916 /* Counts for the number of times an expression needs to be inserted and
1917 number of times an expression can be removed as a result. */
1918 int *insertions
= GCNEWVEC (int, n_elems
);
1919 int *deletions
= GCNEWVEC (int, n_elems
);
1921 /* Set of expressions which require too many insertions relative to
1922 the number of deletions achieved. We will prune these out of the
1923 insertion/deletion sets. */
1924 prune_exprs
= sbitmap_alloc (n_elems
);
1925 sbitmap_zero (prune_exprs
);
1927 /* Iterate over the edges counting the number of times each expression
1928 needs to be inserted. */
1929 for (i
= 0; i
< (unsigned) n_edges
; i
++)
1931 EXECUTE_IF_SET_IN_SBITMAP (pre_insert_map
[i
], 0, j
, sbi
)
1935 /* Similarly for deletions, but those occur in blocks rather than on
1937 for (i
= 0; i
< (unsigned) last_basic_block
; i
++)
1939 EXECUTE_IF_SET_IN_SBITMAP (pre_delete_map
[i
], 0, j
, sbi
)
1943 /* Now that we have accurate counts, iterate over the elements in the
1944 hash table and see if any need too many insertions relative to the
1945 number of evaluations that can be removed. If so, mark them in
1947 for (j
= 0; j
< (unsigned) n_elems
; j
++)
1949 && ((unsigned) insertions
[j
] / deletions
[j
]) > MAX_GCSE_INSERTION_RATIO
)
1950 SET_BIT (prune_exprs
, j
);
1952 /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS. */
1953 EXECUTE_IF_SET_IN_SBITMAP (prune_exprs
, 0, j
, sbi
)
1955 for (i
= 0; i
< (unsigned) n_edges
; i
++)
1956 RESET_BIT (pre_insert_map
[i
], j
);
1958 for (i
= 0; i
< (unsigned) last_basic_block
; i
++)
1959 RESET_BIT (pre_delete_map
[i
], j
);
1962 sbitmap_free (prune_exprs
);
1967 /* Top level routine to do the dataflow analysis needed by PRE. */
1969 static struct edge_list
*
1970 compute_pre_data (void)
1972 struct edge_list
*edge_list
;
1975 compute_local_properties (transp
, comp
, antloc
, &expr_hash_table
);
1976 prune_expressions (true);
1977 sbitmap_vector_zero (ae_kill
, last_basic_block
);
1979 /* Compute ae_kill for each basic block using:
1986 sbitmap_a_or_b (ae_kill
[bb
->index
], transp
[bb
->index
], comp
[bb
->index
]);
1987 sbitmap_not (ae_kill
[bb
->index
], ae_kill
[bb
->index
]);
1990 edge_list
= pre_edge_lcm (expr_hash_table
.n_elems
, transp
, comp
, antloc
,
1991 ae_kill
, &pre_insert_map
, &pre_delete_map
);
1992 sbitmap_vector_free (antloc
);
1994 sbitmap_vector_free (ae_kill
);
1997 prune_insertions_deletions (expr_hash_table
.n_elems
);
2004 /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach
2007 VISITED is a pointer to a working buffer for tracking which BB's have
2008 been visited. It is NULL for the top-level call.
2010 We treat reaching expressions that go through blocks containing the same
2011 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
2012 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2013 2 as not reaching. The intent is to improve the probability of finding
2014 only one reaching expression and to reduce register lifetimes by picking
2015 the closest such expression. */
2018 pre_expr_reaches_here_p_work (basic_block occr_bb
, struct expr
*expr
,
2019 basic_block bb
, char *visited
)
2024 FOR_EACH_EDGE (pred
, ei
, bb
->preds
)
2026 basic_block pred_bb
= pred
->src
;
2028 if (pred
->src
== ENTRY_BLOCK_PTR
2029 /* Has predecessor has already been visited? */
2030 || visited
[pred_bb
->index
])
2031 ;/* Nothing to do. */
2033 /* Does this predecessor generate this expression? */
2034 else if (TEST_BIT (comp
[pred_bb
->index
], expr
->bitmap_index
))
2036 /* Is this the occurrence we're looking for?
2037 Note that there's only one generating occurrence per block
2038 so we just need to check the block number. */
2039 if (occr_bb
== pred_bb
)
2042 visited
[pred_bb
->index
] = 1;
2044 /* Ignore this predecessor if it kills the expression. */
2045 else if (! TEST_BIT (transp
[pred_bb
->index
], expr
->bitmap_index
))
2046 visited
[pred_bb
->index
] = 1;
2048 /* Neither gen nor kill. */
2051 visited
[pred_bb
->index
] = 1;
2052 if (pre_expr_reaches_here_p_work (occr_bb
, expr
, pred_bb
, visited
))
2057 /* All paths have been checked. */
2061 /* The wrapper for pre_expr_reaches_here_work that ensures that any
2062 memory allocated for that function is returned. */
2065 pre_expr_reaches_here_p (basic_block occr_bb
, struct expr
*expr
, basic_block bb
)
2068 char *visited
= XCNEWVEC (char, last_basic_block
);
2070 rval
= pre_expr_reaches_here_p_work (occr_bb
, expr
, bb
, visited
);
2076 /* Generate RTL to copy an EXPR to its `reaching_reg' and return it. */
2079 process_insert_insn (struct expr
*expr
)
2081 rtx reg
= expr
->reaching_reg
;
2082 /* Copy the expression to make sure we don't have any sharing issues. */
2083 rtx exp
= copy_rtx (expr
->expr
);
2088 /* If the expression is something that's an operand, like a constant,
2089 just copy it to a register. */
2090 if (general_operand (exp
, GET_MODE (reg
)))
2091 emit_move_insn (reg
, exp
);
2093 /* Otherwise, make a new insn to compute this expression and make sure the
2094 insn will be recognized (this also adds any needed CLOBBERs). */
2097 rtx insn
= emit_insn (gen_rtx_SET (VOIDmode
, reg
, exp
));
2099 if (insn_invalid_p (insn
))
2109 /* Add EXPR to the end of basic block BB.
2111 This is used by both the PRE and code hoisting. */
2114 insert_insn_end_basic_block (struct expr
*expr
, basic_block bb
)
2116 rtx insn
= BB_END (bb
);
2118 rtx reg
= expr
->reaching_reg
;
2119 int regno
= REGNO (reg
);
2122 pat
= process_insert_insn (expr
);
2123 gcc_assert (pat
&& INSN_P (pat
));
2126 while (NEXT_INSN (pat_end
) != NULL_RTX
)
2127 pat_end
= NEXT_INSN (pat_end
);
2129 /* If the last insn is a jump, insert EXPR in front [taking care to
2130 handle cc0, etc. properly]. Similarly we need to care trapping
2131 instructions in presence of non-call exceptions. */
2134 || (NONJUMP_INSN_P (insn
)
2135 && (!single_succ_p (bb
)
2136 || single_succ_edge (bb
)->flags
& EDGE_ABNORMAL
)))
2142 /* If this is a jump table, then we can't insert stuff here. Since
2143 we know the previous real insn must be the tablejump, we insert
2144 the new instruction just before the tablejump. */
2145 if (GET_CODE (PATTERN (insn
)) == ADDR_VEC
2146 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
)
2147 insn
= prev_active_insn (insn
);
2150 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
2151 if cc0 isn't set. */
2152 note
= find_reg_note (insn
, REG_CC_SETTER
, NULL_RTX
);
2154 insn
= XEXP (note
, 0);
2157 rtx maybe_cc0_setter
= prev_nonnote_insn (insn
);
2158 if (maybe_cc0_setter
2159 && INSN_P (maybe_cc0_setter
)
2160 && sets_cc0_p (PATTERN (maybe_cc0_setter
)))
2161 insn
= maybe_cc0_setter
;
2164 /* FIXME: What if something in cc0/jump uses value set in new insn? */
2165 new_insn
= emit_insn_before_noloc (pat
, insn
, bb
);
2168 /* Likewise if the last insn is a call, as will happen in the presence
2169 of exception handling. */
2170 else if (CALL_P (insn
)
2171 && (!single_succ_p (bb
)
2172 || single_succ_edge (bb
)->flags
& EDGE_ABNORMAL
))
2174 /* Keeping in mind targets with small register classes and parameters
2175 in registers, we search backward and place the instructions before
2176 the first parameter is loaded. Do this for everyone for consistency
2177 and a presumption that we'll get better code elsewhere as well. */
2179 /* Since different machines initialize their parameter registers
2180 in different orders, assume nothing. Collect the set of all
2181 parameter registers. */
2182 insn
= find_first_parameter_load (insn
, BB_HEAD (bb
));
2184 /* If we found all the parameter loads, then we want to insert
2185 before the first parameter load.
2187 If we did not find all the parameter loads, then we might have
2188 stopped on the head of the block, which could be a CODE_LABEL.
2189 If we inserted before the CODE_LABEL, then we would be putting
2190 the insn in the wrong basic block. In that case, put the insn
2191 after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */
2192 while (LABEL_P (insn
)
2193 || NOTE_INSN_BASIC_BLOCK_P (insn
))
2194 insn
= NEXT_INSN (insn
);
2196 new_insn
= emit_insn_before_noloc (pat
, insn
, bb
);
2199 new_insn
= emit_insn_after_noloc (pat
, insn
, bb
);
2204 add_label_notes (PATTERN (pat
), new_insn
);
2207 pat
= NEXT_INSN (pat
);
2210 gcse_create_count
++;
2214 fprintf (dump_file
, "PRE/HOIST: end of bb %d, insn %d, ",
2215 bb
->index
, INSN_UID (new_insn
));
2216 fprintf (dump_file
, "copying expression %d to reg %d\n",
2217 expr
->bitmap_index
, regno
);
2221 /* Insert partially redundant expressions on edges in the CFG to make
2222 the expressions fully redundant. */
2225 pre_edge_insert (struct edge_list
*edge_list
, struct expr
**index_map
)
2227 int e
, i
, j
, num_edges
, set_size
, did_insert
= 0;
2230 /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
2231 if it reaches any of the deleted expressions. */
2233 set_size
= pre_insert_map
[0]->size
;
2234 num_edges
= NUM_EDGES (edge_list
);
2235 inserted
= sbitmap_vector_alloc (num_edges
, expr_hash_table
.n_elems
);
2236 sbitmap_vector_zero (inserted
, num_edges
);
2238 for (e
= 0; e
< num_edges
; e
++)
2241 basic_block bb
= INDEX_EDGE_PRED_BB (edge_list
, e
);
2243 for (i
= indx
= 0; i
< set_size
; i
++, indx
+= SBITMAP_ELT_BITS
)
2245 SBITMAP_ELT_TYPE insert
= pre_insert_map
[e
]->elms
[i
];
2248 insert
&& j
< (int) expr_hash_table
.n_elems
;
2250 if ((insert
& 1) != 0 && index_map
[j
]->reaching_reg
!= NULL_RTX
)
2252 struct expr
*expr
= index_map
[j
];
2255 /* Now look at each deleted occurrence of this expression. */
2256 for (occr
= expr
->antic_occr
; occr
!= NULL
; occr
= occr
->next
)
2258 if (! occr
->deleted_p
)
2261 /* Insert this expression on this edge if it would
2262 reach the deleted occurrence in BB. */
2263 if (!TEST_BIT (inserted
[e
], j
))
2266 edge eg
= INDEX_EDGE (edge_list
, e
);
2268 /* We can't insert anything on an abnormal and
2269 critical edge, so we insert the insn at the end of
2270 the previous block. There are several alternatives
2271 detailed in Morgans book P277 (sec 10.5) for
2272 handling this situation. This one is easiest for
2275 if (eg
->flags
& EDGE_ABNORMAL
)
2276 insert_insn_end_basic_block (index_map
[j
], bb
);
2279 insn
= process_insert_insn (index_map
[j
]);
2280 insert_insn_on_edge (insn
, eg
);
2285 fprintf (dump_file
, "PRE: edge (%d,%d), ",
2287 INDEX_EDGE_SUCC_BB (edge_list
, e
)->index
);
2288 fprintf (dump_file
, "copy expression %d\n",
2289 expr
->bitmap_index
);
2292 update_ld_motion_stores (expr
);
2293 SET_BIT (inserted
[e
], j
);
2295 gcse_create_count
++;
2302 sbitmap_vector_free (inserted
);
2306 /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
2307 Given "old_reg <- expr" (INSN), instead of adding after it
2308 reaching_reg <- old_reg
2309 it's better to do the following:
2310 reaching_reg <- expr
2311 old_reg <- reaching_reg
2312 because this way copy propagation can discover additional PRE
2313 opportunities. But if this fails, we try the old way.
2314 When "expr" is a store, i.e.
2315 given "MEM <- old_reg", instead of adding after it
2316 reaching_reg <- old_reg
2317 it's better to add it before as follows:
2318 reaching_reg <- old_reg
2319 MEM <- reaching_reg. */
2322 pre_insert_copy_insn (struct expr
*expr
, rtx insn
)
2324 rtx reg
= expr
->reaching_reg
;
2325 int regno
= REGNO (reg
);
2326 int indx
= expr
->bitmap_index
;
2327 rtx pat
= PATTERN (insn
);
2328 rtx set
, first_set
, new_insn
;
2332 /* This block matches the logic in hash_scan_insn. */
2333 switch (GET_CODE (pat
))
2340 /* Search through the parallel looking for the set whose
2341 source was the expression that we're interested in. */
2342 first_set
= NULL_RTX
;
2344 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
2346 rtx x
= XVECEXP (pat
, 0, i
);
2347 if (GET_CODE (x
) == SET
)
2349 /* If the source was a REG_EQUAL or REG_EQUIV note, we
2350 may not find an equivalent expression, but in this
2351 case the PARALLEL will have a single set. */
2352 if (first_set
== NULL_RTX
)
2354 if (expr_equiv_p (SET_SRC (x
), expr
->expr
))
2362 gcc_assert (first_set
);
2363 if (set
== NULL_RTX
)
2371 if (REG_P (SET_DEST (set
)))
2373 old_reg
= SET_DEST (set
);
2374 /* Check if we can modify the set destination in the original insn. */
2375 if (validate_change (insn
, &SET_DEST (set
), reg
, 0))
2377 new_insn
= gen_move_insn (old_reg
, reg
);
2378 new_insn
= emit_insn_after (new_insn
, insn
);
2382 new_insn
= gen_move_insn (reg
, old_reg
);
2383 new_insn
= emit_insn_after (new_insn
, insn
);
2386 else /* This is possible only in case of a store to memory. */
2388 old_reg
= SET_SRC (set
);
2389 new_insn
= gen_move_insn (reg
, old_reg
);
2391 /* Check if we can modify the set source in the original insn. */
2392 if (validate_change (insn
, &SET_SRC (set
), reg
, 0))
2393 new_insn
= emit_insn_before (new_insn
, insn
);
2395 new_insn
= emit_insn_after (new_insn
, insn
);
2398 gcse_create_count
++;
2402 "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
2403 BLOCK_FOR_INSN (insn
)->index
, INSN_UID (new_insn
), indx
,
2404 INSN_UID (insn
), regno
);
2407 /* Copy available expressions that reach the redundant expression
2408 to `reaching_reg'. */
2411 pre_insert_copies (void)
2413 unsigned int i
, added_copy
;
2418 /* For each available expression in the table, copy the result to
2419 `reaching_reg' if the expression reaches a deleted one.
2421 ??? The current algorithm is rather brute force.
2422 Need to do some profiling. */
2424 for (i
= 0; i
< expr_hash_table
.size
; i
++)
2425 for (expr
= expr_hash_table
.table
[i
]; expr
; expr
= expr
->next_same_hash
)
2427 /* If the basic block isn't reachable, PPOUT will be TRUE. However,
2428 we don't want to insert a copy here because the expression may not
2429 really be redundant. So only insert an insn if the expression was
2430 deleted. This test also avoids further processing if the
2431 expression wasn't deleted anywhere. */
2432 if (expr
->reaching_reg
== NULL
)
2435 /* Set when we add a copy for that expression. */
2438 for (occr
= expr
->antic_occr
; occr
!= NULL
; occr
= occr
->next
)
2440 if (! occr
->deleted_p
)
2443 for (avail
= expr
->avail_occr
; avail
!= NULL
; avail
= avail
->next
)
2445 rtx insn
= avail
->insn
;
2447 /* No need to handle this one if handled already. */
2448 if (avail
->copied_p
)
2451 /* Don't handle this one if it's a redundant one. */
2452 if (INSN_DELETED_P (insn
))
2455 /* Or if the expression doesn't reach the deleted one. */
2456 if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail
->insn
),
2458 BLOCK_FOR_INSN (occr
->insn
)))
2463 /* Copy the result of avail to reaching_reg. */
2464 pre_insert_copy_insn (expr
, insn
);
2465 avail
->copied_p
= 1;
2470 update_ld_motion_stores (expr
);
2474 /* Emit move from SRC to DEST noting the equivalence with expression computed
2478 gcse_emit_move_after (rtx dest
, rtx src
, rtx insn
)
2481 rtx set
= single_set (insn
), set2
;
2485 /* This should never fail since we're creating a reg->reg copy
2486 we've verified to be valid. */
2488 new_rtx
= emit_insn_after (gen_move_insn (dest
, src
), insn
);
2490 /* Note the equivalence for local CSE pass. */
2491 set2
= single_set (new_rtx
);
2492 if (!set2
|| !rtx_equal_p (SET_DEST (set2
), dest
))
2494 if ((note
= find_reg_equal_equiv_note (insn
)))
2495 eqv
= XEXP (note
, 0);
2497 eqv
= SET_SRC (set
);
2499 set_unique_reg_note (new_rtx
, REG_EQUAL
, copy_insn_1 (eqv
));
2504 /* Delete redundant computations.
2505 Deletion is done by changing the insn to copy the `reaching_reg' of
2506 the expression into the result of the SET. It is left to later passes
2507 (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
2509 Return nonzero if a change is made. */
2520 for (i
= 0; i
< expr_hash_table
.size
; i
++)
2521 for (expr
= expr_hash_table
.table
[i
]; expr
; expr
= expr
->next_same_hash
)
2523 int indx
= expr
->bitmap_index
;
2525 /* We only need to search antic_occr since we require ANTLOC != 0. */
2526 for (occr
= expr
->antic_occr
; occr
!= NULL
; occr
= occr
->next
)
2528 rtx insn
= occr
->insn
;
2530 basic_block bb
= BLOCK_FOR_INSN (insn
);
2532 /* We only delete insns that have a single_set. */
2533 if (TEST_BIT (pre_delete_map
[bb
->index
], indx
)
2534 && (set
= single_set (insn
)) != 0
2535 && dbg_cnt (pre_insn
))
2537 /* Create a pseudo-reg to store the result of reaching
2538 expressions into. Get the mode for the new pseudo from
2539 the mode of the original destination pseudo. */
2540 if (expr
->reaching_reg
== NULL
)
2541 expr
->reaching_reg
= gen_reg_rtx_and_attrs (SET_DEST (set
));
2543 gcse_emit_move_after (SET_DEST (set
), expr
->reaching_reg
, insn
);
2545 occr
->deleted_p
= 1;
2552 "PRE: redundant insn %d (expression %d) in ",
2553 INSN_UID (insn
), indx
);
2554 fprintf (dump_file
, "bb %d, reaching reg is %d\n",
2555 bb
->index
, REGNO (expr
->reaching_reg
));
2564 /* Perform GCSE optimizations using PRE.
2565 This is called by one_pre_gcse_pass after all the dataflow analysis
2568 This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
2569 lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
2570 Compiler Design and Implementation.
2572 ??? A new pseudo reg is created to hold the reaching expression. The nice
2573 thing about the classical approach is that it would try to use an existing
2574 reg. If the register can't be adequately optimized [i.e. we introduce
2575 reload problems], one could add a pass here to propagate the new register
2578 ??? We don't handle single sets in PARALLELs because we're [currently] not
2579 able to copy the rest of the parallel when we insert copies to create full
2580 redundancies from partial redundancies. However, there's no reason why we
2581 can't handle PARALLELs in the cases where there are no partial
2585 pre_gcse (struct edge_list
*edge_list
)
2588 int did_insert
, changed
;
2589 struct expr
**index_map
;
2592 /* Compute a mapping from expression number (`bitmap_index') to
2593 hash table entry. */
2595 index_map
= XCNEWVEC (struct expr
*, expr_hash_table
.n_elems
);
2596 for (i
= 0; i
< expr_hash_table
.size
; i
++)
2597 for (expr
= expr_hash_table
.table
[i
]; expr
; expr
= expr
->next_same_hash
)
2598 index_map
[expr
->bitmap_index
] = expr
;
2600 /* Delete the redundant insns first so that
2601 - we know what register to use for the new insns and for the other
2602 ones with reaching expressions
2603 - we know which insns are redundant when we go to create copies */
2605 changed
= pre_delete ();
2606 did_insert
= pre_edge_insert (edge_list
, index_map
);
2608 /* In other places with reaching expressions, copy the expression to the
2609 specially allocated pseudo-reg that reaches the redundant expr. */
2610 pre_insert_copies ();
2613 commit_edge_insertions ();
2621 /* Top level routine to perform one PRE GCSE pass.
2623 Return nonzero if a change was made. */
2626 one_pre_gcse_pass (void)
2630 gcse_subst_count
= 0;
2631 gcse_create_count
= 0;
2633 /* Return if there's nothing to do, or it is too expensive. */
2634 if (n_basic_blocks
<= NUM_FIXED_BLOCKS
+ 1
2635 || is_too_expensive (_("PRE disabled")))
2638 /* We need alias. */
2639 init_alias_analysis ();
2642 gcc_obstack_init (&gcse_obstack
);
2645 alloc_hash_table (&expr_hash_table
);
2646 add_noreturn_fake_exit_edges ();
2648 compute_ld_motion_mems ();
2650 compute_hash_table (&expr_hash_table
);
2652 trim_ld_motion_mems ();
2654 dump_hash_table (dump_file
, "Expression", &expr_hash_table
);
2656 if (expr_hash_table
.n_elems
> 0)
2658 struct edge_list
*edge_list
;
2659 alloc_pre_mem (last_basic_block
, expr_hash_table
.n_elems
);
2660 edge_list
= compute_pre_data ();
2661 changed
|= pre_gcse (edge_list
);
2662 free_edge_list (edge_list
);
2667 free_ld_motion_mems ();
2668 remove_fake_exit_edges ();
2669 free_hash_table (&expr_hash_table
);
2672 obstack_free (&gcse_obstack
, NULL
);
2674 /* We are finished with alias. */
2675 end_alias_analysis ();
2679 fprintf (dump_file
, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ",
2680 current_function_name (), n_basic_blocks
, bytes_used
);
2681 fprintf (dump_file
, "%d substs, %d insns created\n",
2682 gcse_subst_count
, gcse_create_count
);
2688 /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
2689 to INSN. If such notes are added to an insn which references a
2690 CODE_LABEL, the LABEL_NUSES count is incremented. We have to add
2691 that note, because the following loop optimization pass requires
2694 /* ??? If there was a jump optimization pass after gcse and before loop,
2695 then we would not need to do this here, because jump would add the
2696 necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes. */
2699 add_label_notes (rtx x
, rtx insn
)
2701 enum rtx_code code
= GET_CODE (x
);
2705 if (code
== LABEL_REF
&& !LABEL_REF_NONLOCAL_P (x
))
2707 /* This code used to ignore labels that referred to dispatch tables to
2708 avoid flow generating (slightly) worse code.
2710 We no longer ignore such label references (see LABEL_REF handling in
2711 mark_jump_label for additional information). */
2713 /* There's no reason for current users to emit jump-insns with
2714 such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
2716 gcc_assert (!JUMP_P (insn
));
2717 add_reg_note (insn
, REG_LABEL_OPERAND
, XEXP (x
, 0));
2719 if (LABEL_P (XEXP (x
, 0)))
2720 LABEL_NUSES (XEXP (x
, 0))++;
2725 for (i
= GET_RTX_LENGTH (code
) - 1, fmt
= GET_RTX_FORMAT (code
); i
>= 0; i
--)
2728 add_label_notes (XEXP (x
, i
), insn
);
2729 else if (fmt
[i
] == 'E')
2730 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
2731 add_label_notes (XVECEXP (x
, i
, j
), insn
);
2735 /* Code Hoisting variables and subroutines. */
2737 /* Very busy expressions. */
2738 static sbitmap
*hoist_vbein
;
2739 static sbitmap
*hoist_vbeout
;
2741 /* ??? We could compute post dominators and run this algorithm in
2742 reverse to perform tail merging, doing so would probably be
2743 more effective than the tail merging code in jump.c.
2745 It's unclear if tail merging could be run in parallel with
2746 code hoisting. It would be nice. */
2748 /* Allocate vars used for code hoisting analysis. */
2751 alloc_code_hoist_mem (int n_blocks
, int n_exprs
)
2753 antloc
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
2754 transp
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
2755 comp
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
2757 hoist_vbein
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
2758 hoist_vbeout
= sbitmap_vector_alloc (n_blocks
, n_exprs
);
2761 /* Free vars used for code hoisting analysis. */
2764 free_code_hoist_mem (void)
2766 sbitmap_vector_free (antloc
);
2767 sbitmap_vector_free (transp
);
2768 sbitmap_vector_free (comp
);
2770 sbitmap_vector_free (hoist_vbein
);
2771 sbitmap_vector_free (hoist_vbeout
);
2773 free_dominance_info (CDI_DOMINATORS
);
2776 /* Compute the very busy expressions at entry/exit from each block.
2778 An expression is very busy if all paths from a given point
2779 compute the expression. */
2782 compute_code_hoist_vbeinout (void)
2784 int changed
, passes
;
2787 sbitmap_vector_zero (hoist_vbeout
, last_basic_block
);
2788 sbitmap_vector_zero (hoist_vbein
, last_basic_block
);
2797 /* We scan the blocks in the reverse order to speed up
2799 FOR_EACH_BB_REVERSE (bb
)
2801 if (bb
->next_bb
!= EXIT_BLOCK_PTR
)
2803 sbitmap_intersection_of_succs (hoist_vbeout
[bb
->index
],
2804 hoist_vbein
, bb
->index
);
2806 /* Include expressions in VBEout that are calculated
2807 in BB and available at its end. */
2808 sbitmap_a_or_b (hoist_vbeout
[bb
->index
],
2809 hoist_vbeout
[bb
->index
], comp
[bb
->index
]);
2812 changed
|= sbitmap_a_or_b_and_c_cg (hoist_vbein
[bb
->index
],
2814 hoist_vbeout
[bb
->index
],
2823 fprintf (dump_file
, "hoisting vbeinout computation: %d passes\n", passes
);
2827 fprintf (dump_file
, "vbein (%d): ", bb
->index
);
2828 dump_sbitmap_file (dump_file
, hoist_vbein
[bb
->index
]);
2829 fprintf (dump_file
, "vbeout(%d): ", bb
->index
);
2830 dump_sbitmap_file (dump_file
, hoist_vbeout
[bb
->index
]);
2835 /* Top level routine to do the dataflow analysis needed by code hoisting. */
2838 compute_code_hoist_data (void)
2840 compute_local_properties (transp
, comp
, antloc
, &expr_hash_table
);
2841 prune_expressions (false);
2842 compute_code_hoist_vbeinout ();
2843 calculate_dominance_info (CDI_DOMINATORS
);
2845 fprintf (dump_file
, "\n");
2848 /* Determine if the expression identified by EXPR_INDEX would
2849 reach BB unimpared if it was placed at the end of EXPR_BB.
2850 Stop the search if the expression would need to be moved more
2851 than DISTANCE instructions.
2853 It's unclear exactly what Muchnick meant by "unimpared". It seems
2854 to me that the expression must either be computed or transparent in
2855 *every* block in the path(s) from EXPR_BB to BB. Any other definition
2856 would allow the expression to be hoisted out of loops, even if
2857 the expression wasn't a loop invariant.
2859 Contrast this to reachability for PRE where an expression is
2860 considered reachable if *any* path reaches instead of *all*
2864 hoist_expr_reaches_here_p (basic_block expr_bb
, int expr_index
, basic_block bb
,
2865 char *visited
, int distance
, int *bb_size
)
2869 int visited_allocated_locally
= 0;
2871 /* Terminate the search if distance, for which EXPR is allowed to move,
2875 distance
-= bb_size
[bb
->index
];
2881 gcc_assert (distance
== 0);
2883 if (visited
== NULL
)
2885 visited_allocated_locally
= 1;
2886 visited
= XCNEWVEC (char, last_basic_block
);
2889 FOR_EACH_EDGE (pred
, ei
, bb
->preds
)
2891 basic_block pred_bb
= pred
->src
;
2893 if (pred
->src
== ENTRY_BLOCK_PTR
)
2895 else if (pred_bb
== expr_bb
)
2897 else if (visited
[pred_bb
->index
])
2900 else if (! TEST_BIT (transp
[pred_bb
->index
], expr_index
))
2906 visited
[pred_bb
->index
] = 1;
2907 if (! hoist_expr_reaches_here_p (expr_bb
, expr_index
, pred_bb
,
2908 visited
, distance
, bb_size
))
2912 if (visited_allocated_locally
)
2915 return (pred
== NULL
);
2918 /* Find occurence in BB. */
2920 static struct occr
*
2921 find_occr_in_bb (struct occr
*occr
, basic_block bb
)
2923 /* Find the right occurrence of this expression. */
2924 while (occr
&& BLOCK_FOR_INSN (occr
->insn
) != bb
)
2930 /* Actually perform code hoisting. */
2935 basic_block bb
, dominated
;
2936 VEC (basic_block
, heap
) *dom_tree_walk
;
2937 unsigned int dom_tree_walk_index
;
2938 VEC (basic_block
, heap
) *domby
;
2940 struct expr
**index_map
;
2946 /* Compute a mapping from expression number (`bitmap_index') to
2947 hash table entry. */
2949 index_map
= XCNEWVEC (struct expr
*, expr_hash_table
.n_elems
);
2950 for (i
= 0; i
< expr_hash_table
.size
; i
++)
2951 for (expr
= expr_hash_table
.table
[i
]; expr
; expr
= expr
->next_same_hash
)
2952 index_map
[expr
->bitmap_index
] = expr
;
2954 /* Calculate sizes of basic blocks and note how far
2955 each instruction is from the start of its block. We then use this
2956 data to restrict distance an expression can travel. */
2958 to_bb_head
= XCNEWVEC (int, get_max_uid ());
2959 bb_size
= XCNEWVEC (int, last_basic_block
);
2967 FOR_BB_INSNS (bb
, insn
)
2969 /* Don't count debug instructions to avoid them affecting
2970 decision choices. */
2971 if (NONDEBUG_INSN_P (insn
))
2972 to_bb_head
[INSN_UID (insn
)] = to_head
++;
2975 bb_size
[bb
->index
] = to_head
;
2978 gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR
->succs
) == 1
2979 && (EDGE_SUCC (ENTRY_BLOCK_PTR
, 0)->dest
2980 == ENTRY_BLOCK_PTR
->next_bb
));
2982 dom_tree_walk
= get_all_dominated_blocks (CDI_DOMINATORS
,
2983 ENTRY_BLOCK_PTR
->next_bb
);
2985 /* Walk over each basic block looking for potentially hoistable
2986 expressions, nothing gets hoisted from the entry block. */
2987 FOR_EACH_VEC_ELT (basic_block
, dom_tree_walk
, dom_tree_walk_index
, bb
)
2989 domby
= get_dominated_to_depth (CDI_DOMINATORS
, bb
, MAX_HOIST_DEPTH
);
2991 if (VEC_length (basic_block
, domby
) == 0)
2994 /* Examine each expression that is very busy at the exit of this
2995 block. These are the potentially hoistable expressions. */
2996 for (i
= 0; i
< hoist_vbeout
[bb
->index
]->n_bits
; i
++)
2998 if (TEST_BIT (hoist_vbeout
[bb
->index
], i
))
3000 /* Current expression. */
3001 struct expr
*expr
= index_map
[i
];
3002 /* Number of occurences of EXPR that can be hoisted to BB. */
3004 /* Basic blocks that have occurences reachable from BB. */
3005 bitmap_head _from_bbs
, *from_bbs
= &_from_bbs
;
3006 /* Occurences reachable from BB. */
3007 VEC (occr_t
, heap
) *occrs_to_hoist
= NULL
;
3008 /* We want to insert the expression into BB only once, so
3009 note when we've inserted it. */
3010 int insn_inserted_p
;
3013 bitmap_initialize (from_bbs
, 0);
3015 /* If an expression is computed in BB and is available at end of
3016 BB, hoist all occurences dominated by BB to BB. */
3017 if (TEST_BIT (comp
[bb
->index
], i
))
3019 occr
= find_occr_in_bb (expr
->antic_occr
, bb
);
3023 /* An occurence might've been already deleted
3024 while processing a dominator of BB. */
3025 if (!occr
->deleted_p
)
3027 gcc_assert (NONDEBUG_INSN_P (occr
->insn
));
3035 /* We've found a potentially hoistable expression, now
3036 we look at every block BB dominates to see if it
3037 computes the expression. */
3038 FOR_EACH_VEC_ELT (basic_block
, domby
, j
, dominated
)
3042 /* Ignore self dominance. */
3043 if (bb
== dominated
)
3045 /* We've found a dominated block, now see if it computes
3046 the busy expression and whether or not moving that
3047 expression to the "beginning" of that block is safe. */
3048 if (!TEST_BIT (antloc
[dominated
->index
], i
))
3051 occr
= find_occr_in_bb (expr
->antic_occr
, dominated
);
3054 /* An occurence might've been already deleted
3055 while processing a dominator of BB. */
3056 if (occr
->deleted_p
)
3058 gcc_assert (NONDEBUG_INSN_P (occr
->insn
));
3060 max_distance
= expr
->max_distance
;
3061 if (max_distance
> 0)
3062 /* Adjust MAX_DISTANCE to account for the fact that
3063 OCCR won't have to travel all of DOMINATED, but
3065 max_distance
+= (bb_size
[dominated
->index
]
3066 - to_bb_head
[INSN_UID (occr
->insn
)]);
3068 /* Note if the expression would reach the dominated block
3069 unimpared if it was placed at the end of BB.
3071 Keep track of how many times this expression is hoistable
3072 from a dominated block into BB. */
3073 if (hoist_expr_reaches_here_p (bb
, i
, dominated
, NULL
,
3074 max_distance
, bb_size
))
3077 VEC_safe_push (occr_t
, heap
,
3078 occrs_to_hoist
, occr
);
3079 bitmap_set_bit (from_bbs
, dominated
->index
);
3083 /* If we found more than one hoistable occurrence of this
3084 expression, then note it in the vector of expressions to
3085 hoist. It makes no sense to hoist things which are computed
3086 in only one BB, and doing so tends to pessimize register
3087 allocation. One could increase this value to try harder
3088 to avoid any possible code expansion due to register
3089 allocation issues; however experiments have shown that
3090 the vast majority of hoistable expressions are only movable
3091 from two successors, so raising this threshold is likely
3092 to nullify any benefit we get from code hoisting. */
3093 if (hoistable
> 1 && dbg_cnt (hoist_insn
))
3095 /* If (hoistable != VEC_length), then there is
3096 an occurence of EXPR in BB itself. Don't waste
3097 time looking for LCA in this case. */
3098 if ((unsigned) hoistable
3099 == VEC_length (occr_t
, occrs_to_hoist
))
3103 lca
= nearest_common_dominator_for_set (CDI_DOMINATORS
,
3106 /* Punt, it's better to hoist these occurences to
3108 VEC_free (occr_t
, heap
, occrs_to_hoist
);
3112 /* Punt, no point hoisting a single occurence. */
3113 VEC_free (occr_t
, heap
, occrs_to_hoist
);
3115 insn_inserted_p
= 0;
3117 /* Walk through occurences of I'th expressions we want
3118 to hoist to BB and make the transformations. */
3119 FOR_EACH_VEC_ELT (occr_t
, occrs_to_hoist
, j
, occr
)
3124 gcc_assert (!occr
->deleted_p
);
3127 set
= single_set (insn
);
3130 /* Create a pseudo-reg to store the result of reaching
3131 expressions into. Get the mode for the new pseudo
3132 from the mode of the original destination pseudo.
3134 It is important to use new pseudos whenever we
3135 emit a set. This will allow reload to use
3136 rematerialization for such registers. */
3137 if (!insn_inserted_p
)
3139 = gen_reg_rtx_and_attrs (SET_DEST (set
));
3141 gcse_emit_move_after (SET_DEST (set
), expr
->reaching_reg
,
3144 occr
->deleted_p
= 1;
3148 if (!insn_inserted_p
)
3150 insert_insn_end_basic_block (expr
, bb
);
3151 insn_inserted_p
= 1;
3155 VEC_free (occr_t
, heap
, occrs_to_hoist
);
3156 bitmap_clear (from_bbs
);
3159 VEC_free (basic_block
, heap
, domby
);
3162 VEC_free (basic_block
, heap
, dom_tree_walk
);
3170 /* Top level routine to perform one code hoisting (aka unification) pass
3172 Return nonzero if a change was made. */
3175 one_code_hoisting_pass (void)
3179 gcse_subst_count
= 0;
3180 gcse_create_count
= 0;
3182 /* Return if there's nothing to do, or it is too expensive. */
3183 if (n_basic_blocks
<= NUM_FIXED_BLOCKS
+ 1
3184 || is_too_expensive (_("GCSE disabled")))
3187 doing_code_hoisting_p
= true;
3189 /* We need alias. */
3190 init_alias_analysis ();
3193 gcc_obstack_init (&gcse_obstack
);
3196 alloc_hash_table (&expr_hash_table
);
3197 compute_hash_table (&expr_hash_table
);
3199 dump_hash_table (dump_file
, "Code Hosting Expressions", &expr_hash_table
);
3201 if (expr_hash_table
.n_elems
> 0)
3203 alloc_code_hoist_mem (last_basic_block
, expr_hash_table
.n_elems
);
3204 compute_code_hoist_data ();
3205 changed
= hoist_code ();
3206 free_code_hoist_mem ();
3209 free_hash_table (&expr_hash_table
);
3211 obstack_free (&gcse_obstack
, NULL
);
3213 /* We are finished with alias. */
3214 end_alias_analysis ();
3218 fprintf (dump_file
, "HOIST of %s, %d basic blocks, %d bytes needed, ",
3219 current_function_name (), n_basic_blocks
, bytes_used
);
3220 fprintf (dump_file
, "%d substs, %d insns created\n",
3221 gcse_subst_count
, gcse_create_count
);
3224 doing_code_hoisting_p
= false;
3229 /* Here we provide the things required to do store motion towards the exit.
3230 In order for this to be effective, gcse also needed to be taught how to
3231 move a load when it is killed only by a store to itself.
3236 void foo(float scale)
3238 for (i=0; i<10; i++)
3242 'i' is both loaded and stored to in the loop. Normally, gcse cannot move
3243 the load out since its live around the loop, and stored at the bottom
3246 The 'Load Motion' referred to and implemented in this file is
3247 an enhancement to gcse which when using edge based LCM, recognizes
3248 this situation and allows gcse to move the load out of the loop.
3250 Once gcse has hoisted the load, store motion can then push this
3251 load towards the exit, and we end up with no loads or stores of 'i'
3255 pre_ldst_expr_hash (const void *p
)
3257 int do_not_record_p
= 0;
3258 const struct ls_expr
*const x
= (const struct ls_expr
*) p
;
3260 hash_rtx (x
->pattern
, GET_MODE (x
->pattern
), &do_not_record_p
, NULL
, false);
3264 pre_ldst_expr_eq (const void *p1
, const void *p2
)
3266 const struct ls_expr
*const ptr1
= (const struct ls_expr
*) p1
,
3267 *const ptr2
= (const struct ls_expr
*) p2
;
3268 return expr_equiv_p (ptr1
->pattern
, ptr2
->pattern
);
3271 /* This will search the ldst list for a matching expression. If it
3272 doesn't find one, we create one and initialize it. */
3274 static struct ls_expr
*
3277 int do_not_record_p
= 0;
3278 struct ls_expr
* ptr
;
3283 hash
= hash_rtx (x
, GET_MODE (x
), &do_not_record_p
,
3284 NULL
, /*have_reg_qty=*/false);
3287 slot
= htab_find_slot_with_hash (pre_ldst_table
, &e
, hash
, INSERT
);
3289 return (struct ls_expr
*)*slot
;
3291 ptr
= XNEW (struct ls_expr
);
3293 ptr
->next
= pre_ldst_mems
;
3296 ptr
->pattern_regs
= NULL_RTX
;
3297 ptr
->loads
= NULL_RTX
;
3298 ptr
->stores
= NULL_RTX
;
3299 ptr
->reaching_reg
= NULL_RTX
;
3302 ptr
->hash_index
= hash
;
3303 pre_ldst_mems
= ptr
;
3309 /* Free up an individual ldst entry. */
3312 free_ldst_entry (struct ls_expr
* ptr
)
3314 free_INSN_LIST_list (& ptr
->loads
);
3315 free_INSN_LIST_list (& ptr
->stores
);
3320 /* Free up all memory associated with the ldst list. */
3323 free_ld_motion_mems (void)
3326 htab_delete (pre_ldst_table
);
3327 pre_ldst_table
= NULL
;
3329 while (pre_ldst_mems
)
3331 struct ls_expr
* tmp
= pre_ldst_mems
;
3333 pre_ldst_mems
= pre_ldst_mems
->next
;
3335 free_ldst_entry (tmp
);
3338 pre_ldst_mems
= NULL
;
3341 /* Dump debugging info about the ldst list. */
3344 print_ldst_list (FILE * file
)
3346 struct ls_expr
* ptr
;
3348 fprintf (file
, "LDST list: \n");
3350 for (ptr
= pre_ldst_mems
; ptr
!= NULL
; ptr
= ptr
->next
)
3352 fprintf (file
, " Pattern (%3d): ", ptr
->index
);
3354 print_rtl (file
, ptr
->pattern
);
3356 fprintf (file
, "\n Loads : ");
3359 print_rtl (file
, ptr
->loads
);
3361 fprintf (file
, "(nil)");
3363 fprintf (file
, "\n Stores : ");
3366 print_rtl (file
, ptr
->stores
);
3368 fprintf (file
, "(nil)");
3370 fprintf (file
, "\n\n");
3373 fprintf (file
, "\n");
3376 /* Returns 1 if X is in the list of ldst only expressions. */
3378 static struct ls_expr
*
3379 find_rtx_in_ldst (rtx x
)
3383 if (!pre_ldst_table
)
3386 slot
= htab_find_slot (pre_ldst_table
, &e
, NO_INSERT
);
3387 if (!slot
|| ((struct ls_expr
*)*slot
)->invalid
)
3389 return (struct ls_expr
*) *slot
;
3392 /* Load Motion for loads which only kill themselves. */
3394 /* Return true if x, a MEM, is a simple access with no side effects.
3395 These are the types of loads we consider for the ld_motion list,
3396 otherwise we let the usual aliasing take care of it. */
3399 simple_mem (const_rtx x
)
3401 if (MEM_VOLATILE_P (x
))
3404 if (GET_MODE (x
) == BLKmode
)
3407 /* If we are handling exceptions, we must be careful with memory references
3408 that may trap. If we are not, the behavior is undefined, so we may just
3410 if (cfun
->can_throw_non_call_exceptions
&& may_trap_p (x
))
3413 if (side_effects_p (x
))
3416 /* Do not consider function arguments passed on stack. */
3417 if (reg_mentioned_p (stack_pointer_rtx
, x
))
3420 if (flag_float_store
&& FLOAT_MODE_P (GET_MODE (x
)))
3426 /* Make sure there isn't a buried reference in this pattern anywhere.
3427 If there is, invalidate the entry for it since we're not capable
3428 of fixing it up just yet.. We have to be sure we know about ALL
3429 loads since the aliasing code will allow all entries in the
3430 ld_motion list to not-alias itself. If we miss a load, we will get
3431 the wrong value since gcse might common it and we won't know to
3435 invalidate_any_buried_refs (rtx x
)
3439 struct ls_expr
* ptr
;
3441 /* Invalidate it in the list. */
3442 if (MEM_P (x
) && simple_mem (x
))
3444 ptr
= ldst_entry (x
);
3448 /* Recursively process the insn. */
3449 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
3451 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
3454 invalidate_any_buried_refs (XEXP (x
, i
));
3455 else if (fmt
[i
] == 'E')
3456 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
3457 invalidate_any_buried_refs (XVECEXP (x
, i
, j
));
3461 /* The maximum number of load/store motions that can be performed for one
3463 static unsigned maximum_lsm_limit
;
3465 /* Find all the 'simple' MEMs which are used in LOADs and STORES. Simple
3466 being defined as MEM loads and stores to symbols, with no side effects
3467 and no registers in the expression. For a MEM destination, we also
3468 check that the insn is still valid if we replace the destination with a
3469 REG, as is done in update_ld_motion_stores. If there are any uses/defs
3470 which don't match this criteria, they are invalidated and trimmed out
3474 compute_ld_motion_mems (void)
3476 struct ls_expr
* ptr
;
3480 init_loop_lsm_limit ();
3482 pre_ldst_mems
= NULL
;
3484 = htab_create (13, pre_ldst_expr_hash
, pre_ldst_expr_eq
, NULL
);
3488 struct loop
*loop
= bb
->loop_father
;
3489 unsigned ld_motion_limit
= find_loop_lsm_limit (loop
);
3490 unsigned ld_motion_count
= 0;
3492 FOR_BB_INSNS (bb
, insn
)
3494 if (NONDEBUG_INSN_P (insn
))
3496 if (GET_CODE (PATTERN (insn
)) == SET
)
3498 rtx src
= SET_SRC (PATTERN (insn
));
3499 rtx dest
= SET_DEST (PATTERN (insn
));
3501 /* Check for a simple LOAD... */
3502 if (MEM_P (src
) && simple_mem (src
))
3504 ptr
= ldst_entry (src
);
3506 ptr
->loads
= alloc_INSN_LIST (insn
, ptr
->loads
);
3512 /* Make sure there isn't a buried load somewhere. */
3513 invalidate_any_buried_refs (src
);
3516 /* Check for stores. Don't worry about aliased ones, they
3517 will block any movement we might do later. We only care
3518 about this exact pattern since those are the only
3519 circumstance that we will ignore the aliasing info. */
3520 if (MEM_P (dest
) && simple_mem (dest
))
3522 ptr
= ldst_entry (dest
);
3525 && GET_CODE (src
) != ASM_OPERANDS
3526 /* Check for REG manually since want_to_gcse_p
3527 returns 0 for all REGs. */
3528 && can_assign_to_reg_without_clobbers_p (src
)
3529 && ld_motion_count
< ld_motion_limit
)
3531 ptr
->stores
= alloc_INSN_LIST (insn
, ptr
->stores
);
3537 if (dump_file
&& ld_motion_count
>= ld_motion_limit
)
3539 "suppress ld_motion in loop=%d bb=%d due"
3540 " to reg pressure.\n",
3541 loop
->num
, bb
->index
);
3547 invalidate_any_buried_refs (PATTERN (insn
));
3550 adjust_loop_lsm_limit (loop
, ld_motion_count
);
3553 fini_loop_lsm_limit ();
3560 } loop_lsm_limit_map
;
3562 /* hash_table that maps loop to lsm_limit. */
3563 static htab_t loop_lsm_limit_map_htab
= NULL
;
3565 /* hash function for loop_lsm_limit_map_htab. */
3568 loop_lsm_limit_map_hash (const void *p
)
3570 const loop_lsm_limit_map
*const x
= (const loop_lsm_limit_map
*) p
;
3571 return htab_hash_pointer (x
->loop
);
3574 /* hash equal function for loop_lsm_limit_map_htab. */
3577 loop_lsm_limit_map_eq (const void *p1
, const void *p2
)
3579 const loop_lsm_limit_map
*const ptr1
= (const loop_lsm_limit_map
*) p1
;
3580 const loop_lsm_limit_map
*const ptr2
= (const loop_lsm_limit_map
*) p2
;
3582 return htab_eq_pointer (ptr1
->loop
, ptr2
->loop
);
3585 /* free one entry in loop_lsm_limit_map_htab. */
3588 free_loop_lsm_limit_map (void **slot
, void *data ATTRIBUTE_UNUSED
)
3590 free(*(loop_lsm_limit_map
**) slot
);
3594 /* use the loops strcture to find enclosing loop for each basic_block */
3595 static struct loops loops_lsm
;
3596 static bool dominator_info_avail_before_lsm_limit
= false;
3598 /* Initalization function for suppressing execessive load/store motions.
3599 Build the loop structure so that we can count the number of motions
3600 performed. Set the maximum number of motions for a loop. */
3603 init_loop_lsm_limit (void)
3605 dominator_info_avail_before_lsm_limit
= dom_info_available_p(CDI_DOMINATORS
);
3606 flow_loops_find (&loops_lsm
);
3608 loop_lsm_limit_map_htab
= htab_create (59, loop_lsm_limit_map_hash
,
3609 loop_lsm_limit_map_eq
, NULL
);
3611 /* avail_regs minus the induction variables */
3612 maximum_lsm_limit
= target_avail_regs
- 1;
3615 /* Cleanup function: destroy loop structure and free space. */
3618 fini_loop_lsm_limit (void)
3622 flow_loops_free (&loops_lsm
);
3624 if (!dominator_info_avail_before_lsm_limit
)
3625 free_dominance_info (CDI_DOMINATORS
);
3628 bb
->loop_father
= NULL
;
3630 if (loop_lsm_limit_map_htab
)
3632 htab_traverse (loop_lsm_limit_map_htab
, free_loop_lsm_limit_map
, NULL
);
3633 htab_delete (loop_lsm_limit_map_htab
);
3637 /* This routine tries to find the number of registers that
3638 (1) live across the iteration, and
3639 (2) referenced in the loop.
3640 those store-motions performed in tree-lim falls into this category. */
3643 estimate_reg_pressure_before_lsm (struct loop
*loop
)
3648 regset live_across_iteration
;
3649 regset used_in_loop
;
3650 basic_block header
, latch
;
3655 gcc_assert (loop
!= loops_lsm
.tree_root
);
3657 latch
= loop
->latch
;
3658 /* don't handle multiple latches */
3662 header
= loop
->header
;
3663 gcc_assert (header
);
3665 live_across_iteration
= ALLOC_REG_SET (®_obstack
);
3666 COPY_REG_SET (live_across_iteration
, DF_LR_IN (header
));
3667 AND_REG_SET (live_across_iteration
, DF_LR_OUT (latch
));
3669 used_in_loop
= ALLOC_REG_SET (®_obstack
);
3670 CLEAR_REG_SET (used_in_loop
);
3672 body
= get_loop_body (loop
);
3673 /* this loop computes a subset of must-use */
3674 for (i
=0; i
<loop
->num_nodes
; i
++)
3676 basic_block bb
= *(body
+ i
);
3678 if (dominated_by_p (CDI_DOMINATORS
, latch
, bb
))
3679 IOR_REG_SET (used_in_loop
, &(DF_LR_BB_INFO (bb
)->use
));
3683 AND_REG_SET (live_across_iteration
, used_in_loop
);
3685 count
= bitmap_count_bits (live_across_iteration
);
3687 FREE_REG_SET (live_across_iteration
);
3688 FREE_REG_SET (used_in_loop
);
3693 /* Find the number of load/store motion we can perform without
3694 create excessive register pressure to loop LOOP */
3697 find_loop_lsm_limit (struct loop
*loop
)
3700 loop_lsm_limit_map t
, *ret
;
3701 unsigned reg_pressure
;
3702 unsigned limit
= maximum_lsm_limit
;
3704 if (!loop
|| loop
== loops_lsm
.tree_root
)
3705 return (unsigned)-1;
3708 slot
= htab_find_slot (loop_lsm_limit_map_htab
, &t
, NO_INSERT
);
3710 return (*(loop_lsm_limit_map
**)slot
)->lsm_limit
;
3712 /* insert this loop and initialize it */
3713 reg_pressure
= estimate_reg_pressure_before_lsm (loop
);
3714 ret
= (loop_lsm_limit_map
*) gmalloc (sizeof (loop_lsm_limit_map
));
3717 limit
= (limit
> reg_pressure
? limit
- reg_pressure
: 0);
3719 ret
->lsm_limit
= limit
;
3720 slot
= htab_find_slot (loop_lsm_limit_map_htab
, ret
, INSERT
);
3722 (*slot
) = (void**)ret
;
3727 /* Adjust the lsm limit based on the instances that have been
3731 adjust_loop_lsm_limit (struct loop
*loop
, unsigned performed
)
3734 loop_lsm_limit_map t
;
3737 if (performed
== 0 || loop
== NULL
|| loop
== loops_lsm
.tree_root
)
3741 slot
= htab_find_slot (loop_lsm_limit_map_htab
, &t
, NO_INSERT
);
3743 limit
= (*(loop_lsm_limit_map
**)slot
)->lsm_limit
;
3744 gcc_assert (limit
>= performed
);
3745 (*(loop_lsm_limit_map
**)slot
)->lsm_limit
= limit
- performed
;
3748 /* Remove any references that have been either invalidated or are not in the
3749 expression list for pre gcse. */
3752 trim_ld_motion_mems (void)
3754 struct ls_expr
* * last
= & pre_ldst_mems
;
3755 struct ls_expr
* ptr
= pre_ldst_mems
;
3761 /* Delete if entry has been made invalid. */
3764 /* Delete if we cannot find this mem in the expression list. */
3765 unsigned int hash
= ptr
->hash_index
% expr_hash_table
.size
;
3767 for (expr
= expr_hash_table
.table
[hash
];
3769 expr
= expr
->next_same_hash
)
3770 if (expr_equiv_p (expr
->expr
, ptr
->pattern
))
3774 expr
= (struct expr
*) 0;
3778 /* Set the expression field if we are keeping it. */
3786 htab_remove_elt_with_hash (pre_ldst_table
, ptr
, ptr
->hash_index
);
3787 free_ldst_entry (ptr
);
3792 /* Show the world what we've found. */
3793 if (dump_file
&& pre_ldst_mems
!= NULL
)
3794 print_ldst_list (dump_file
);
3797 /* This routine will take an expression which we are replacing with
3798 a reaching register, and update any stores that are needed if
3799 that expression is in the ld_motion list. Stores are updated by
3800 copying their SRC to the reaching register, and then storing
3801 the reaching register into the store location. These keeps the
3802 correct value in the reaching register for the loads. */
3805 update_ld_motion_stores (struct expr
* expr
)
3807 struct ls_expr
* mem_ptr
;
3809 if ((mem_ptr
= find_rtx_in_ldst (expr
->expr
)))
3811 /* We can try to find just the REACHED stores, but is shouldn't
3812 matter to set the reaching reg everywhere... some might be
3813 dead and should be eliminated later. */
3815 /* We replace (set mem expr) with (set reg expr) (set mem reg)
3816 where reg is the reaching reg used in the load. We checked in
3817 compute_ld_motion_mems that we can replace (set mem expr) with
3818 (set reg expr) in that insn. */
3819 rtx list
= mem_ptr
->stores
;
3821 for ( ; list
!= NULL_RTX
; list
= XEXP (list
, 1))
3823 rtx insn
= XEXP (list
, 0);
3824 rtx pat
= PATTERN (insn
);
3825 rtx src
= SET_SRC (pat
);
3826 rtx reg
= expr
->reaching_reg
;
3829 /* If we've already copied it, continue. */
3830 if (expr
->reaching_reg
== src
)
3835 fprintf (dump_file
, "PRE: store updated with reaching reg ");
3836 print_rtl (dump_file
, reg
);
3837 fprintf (dump_file
, ":\n ");
3838 print_inline_rtx (dump_file
, insn
, 8);
3839 fprintf (dump_file
, "\n");
3842 copy
= gen_move_insn (reg
, copy_rtx (SET_SRC (pat
)));
3843 emit_insn_before (copy
, insn
);
3844 SET_SRC (pat
) = reg
;
3845 df_insn_rescan (insn
);
3847 /* un-recognize this pattern since it's probably different now. */
3848 INSN_CODE (insn
) = -1;
3849 gcse_create_count
++;
3854 /* Return true if the graph is too expensive to optimize. PASS is the
3855 optimization about to be performed. */
3858 is_too_expensive (const char *pass
)
3860 /* Trying to perform global optimizations on flow graphs which have
3861 a high connectivity will take a long time and is unlikely to be
3862 particularly useful.
3864 In normal circumstances a cfg should have about twice as many
3865 edges as blocks. But we do not want to punish small functions
3866 which have a couple switch statements. Rather than simply
3867 threshold the number of blocks, uses something with a more
3868 graceful degradation. */
3869 if (n_edges
> 20000 + n_basic_blocks
* 4)
3871 warning (OPT_Wdisabled_optimization
,
3872 "%s: %d basic blocks and %d edges/basic block",
3873 pass
, n_basic_blocks
, n_edges
/ n_basic_blocks
);
3878 /* If allocating memory for the dataflow bitmaps would take up too much
3879 storage it's better just to disable the optimization. */
3881 * SBITMAP_SET_SIZE (max_reg_num ())
3882 * sizeof (SBITMAP_ELT_TYPE
)) > MAX_GCSE_MEMORY
)
3884 warning (OPT_Wdisabled_optimization
,
3885 "%s: %d basic blocks and %d registers",
3886 pass
, n_basic_blocks
, max_reg_num ());
3894 /* All the passes implemented in this file. Each pass has its
3895 own gate and execute function, and at the end of the file a
3896 pass definition for passes.c.
3898 We do not construct an accurate cfg in functions which call
3899 setjmp, so none of these passes runs if the function calls
3901 FIXME: Should just handle setjmp via REG_SETJMP notes. */
3906 return optimize
> 0 && flag_gcse
3907 && !cfun
->calls_setjmp
3908 && optimize_function_for_speed_p (cfun
)
3913 execute_rtl_pre (void)
3916 delete_unreachable_blocks ();
3918 changed
= one_pre_gcse_pass ();
3919 flag_rerun_cse_after_global_opts
|= changed
;
3926 gate_rtl_hoist (void)
3928 return optimize
> 0 && flag_gcse
3929 && !cfun
->calls_setjmp
3930 /* It does not make sense to run code hoisting unless we are optimizing
3931 for code size -- it rarely makes programs faster, and can make then
3932 bigger if we did PRE (when optimizing for space, we don't run PRE). */
3933 && optimize_function_for_size_p (cfun
)
3938 execute_rtl_hoist (void)
3941 delete_unreachable_blocks ();
3943 changed
= one_code_hoisting_pass ();
3944 flag_rerun_cse_after_global_opts
|= changed
;
3950 struct rtl_opt_pass pass_rtl_pre
=
3954 "rtl pre", /* name */
3955 gate_rtl_pre
, /* gate */
3956 execute_rtl_pre
, /* execute */
3959 0, /* static_pass_number */
3961 PROP_cfglayout
, /* properties_required */
3962 0, /* properties_provided */
3963 0, /* properties_destroyed */
3964 0, /* todo_flags_start */
3965 TODO_df_finish
| TODO_verify_rtl_sharing
|
3966 TODO_verify_flow
| TODO_ggc_collect
/* todo_flags_finish */
3970 struct rtl_opt_pass pass_rtl_hoist
=
3975 gate_rtl_hoist
, /* gate */
3976 execute_rtl_hoist
, /* execute */
3979 0, /* static_pass_number */
3980 TV_HOIST
, /* tv_id */
3981 PROP_cfglayout
, /* properties_required */
3982 0, /* properties_provided */
3983 0, /* properties_destroyed */
3984 0, /* todo_flags_start */
3985 TODO_df_finish
| TODO_verify_rtl_sharing
|
3986 TODO_verify_flow
| TODO_ggc_collect
/* todo_flags_finish */
3990 #include "gt-gcse.h"