2008-05-30 Vladimir Makarov <vmakarov@redhat.com>
[official-gcc.git] / gcc / gcse.c
blobcefeb6decf434eab381ebc6377a0f5ededcca19f
1 /* Global common subexpression elimination/Partial redundancy elimination
2 and global constant/copy propagation for GNU compiler.
3 Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
4 2006, 2007, 2008 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* TODO
23 - reordering of memory allocation and freeing to be more space efficient
24 - do rough calc of how many regs are needed in each block, and a rough
25 calc of how many regs are available in each class and use that to
26 throttle back the code in cases where RTX_COST is minimal.
27 - a store to the same address as a load does not kill the load if the
28 source of the store is also the destination of the load. Handling this
29 allows more load motion, particularly out of loops.
30 - ability to realloc sbitmap vectors would allow one initial computation
31 of reg_set_in_block with only subsequent additions, rather than
32 recomputing it for each pass
36 /* References searched while implementing this.
38 Compilers Principles, Techniques and Tools
39 Aho, Sethi, Ullman
40 Addison-Wesley, 1988
42 Global Optimization by Suppression of Partial Redundancies
43 E. Morel, C. Renvoise
44 communications of the acm, Vol. 22, Num. 2, Feb. 1979
46 A Portable Machine-Independent Global Optimizer - Design and Measurements
47 Frederick Chow
48 Stanford Ph.D. thesis, Dec. 1983
50 A Fast Algorithm for Code Movement Optimization
51 D.M. Dhamdhere
52 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
54 A Solution to a Problem with Morel and Renvoise's
55 Global Optimization by Suppression of Partial Redundancies
56 K-H Drechsler, M.P. Stadel
57 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
59 Practical Adaptation of the Global Optimization
60 Algorithm of Morel and Renvoise
61 D.M. Dhamdhere
62 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
64 Efficiently Computing Static Single Assignment Form and the Control
65 Dependence Graph
66 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
67 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
69 Lazy Code Motion
70 J. Knoop, O. Ruthing, B. Steffen
71 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
73 What's In a Region? Or Computing Control Dependence Regions in Near-Linear
74 Time for Reducible Flow Control
75 Thomas Ball
76 ACM Letters on Programming Languages and Systems,
77 Vol. 2, Num. 1-4, Mar-Dec 1993
79 An Efficient Representation for Sparse Sets
80 Preston Briggs, Linda Torczon
81 ACM Letters on Programming Languages and Systems,
82 Vol. 2, Num. 1-4, Mar-Dec 1993
84 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
85 K-H Drechsler, M.P. Stadel
86 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
88 Partial Dead Code Elimination
89 J. Knoop, O. Ruthing, B. Steffen
90 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
92 Effective Partial Redundancy Elimination
93 P. Briggs, K.D. Cooper
94 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
96 The Program Structure Tree: Computing Control Regions in Linear Time
97 R. Johnson, D. Pearson, K. Pingali
98 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
100 Optimal Code Motion: Theory and Practice
101 J. Knoop, O. Ruthing, B. Steffen
102 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
104 The power of assignment motion
105 J. Knoop, O. Ruthing, B. Steffen
106 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
108 Global code motion / global value numbering
109 C. Click
110 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
112 Value Driven Redundancy Elimination
113 L.T. Simpson
114 Rice University Ph.D. thesis, Apr. 1996
116 Value Numbering
117 L.T. Simpson
118 Massively Scalar Compiler Project, Rice University, Sep. 1996
120 High Performance Compilers for Parallel Computing
121 Michael Wolfe
122 Addison-Wesley, 1996
124 Advanced Compiler Design and Implementation
125 Steven Muchnick
126 Morgan Kaufmann, 1997
128 Building an Optimizing Compiler
129 Robert Morgan
130 Digital Press, 1998
132 People wishing to speed up the code here should read:
133 Elimination Algorithms for Data Flow Analysis
134 B.G. Ryder, M.C. Paull
135 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
137 How to Analyze Large Programs Efficiently and Informatively
138 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
139 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
141 People wishing to do something different can find various possibilities
142 in the above papers and elsewhere.
145 #include "config.h"
146 #include "system.h"
147 #include "coretypes.h"
148 #include "tm.h"
149 #include "toplev.h"
151 #include "rtl.h"
152 #include "tree.h"
153 #include "tm_p.h"
154 #include "regs.h"
155 #include "hard-reg-set.h"
156 #include "flags.h"
157 #include "real.h"
158 #include "insn-config.h"
159 #include "recog.h"
160 #include "basic-block.h"
161 #include "output.h"
162 #include "function.h"
163 #include "expr.h"
164 #include "except.h"
165 #include "ggc.h"
166 #include "params.h"
167 #include "cselib.h"
168 #include "intl.h"
169 #include "obstack.h"
170 #include "timevar.h"
171 #include "tree-pass.h"
172 #include "hashtab.h"
173 #include "df.h"
174 #include "dbgcnt.h"
176 /* Propagate flow information through back edges and thus enable PRE's
177 moving loop invariant calculations out of loops.
179 Originally this tended to create worse overall code, but several
180 improvements during the development of PRE seem to have made following
181 back edges generally a win.
183 Note much of the loop invariant code motion done here would normally
184 be done by loop.c, which has more heuristics for when to move invariants
185 out of loops. At some point we might need to move some of those
186 heuristics into gcse.c. */
188 /* We support GCSE via Partial Redundancy Elimination. PRE optimizations
189 are a superset of those done by GCSE.
191 We perform the following steps:
193 1) Compute basic block information.
195 2) Compute table of places where registers are set.
197 3) Perform copy/constant propagation.
199 4) Perform global cse using lazy code motion if not optimizing
200 for size, or code hoisting if we are.
202 5) Perform another pass of copy/constant propagation.
204 Two passes of copy/constant propagation are done because the first one
205 enables more GCSE and the second one helps to clean up the copies that
206 GCSE creates. This is needed more for PRE than for Classic because Classic
207 GCSE will try to use an existing register containing the common
208 subexpression rather than create a new one. This is harder to do for PRE
209 because of the code motion (which Classic GCSE doesn't do).
211 Expressions we are interested in GCSE-ing are of the form
212 (set (pseudo-reg) (expression)).
213 Function want_to_gcse_p says what these are.
215 PRE handles moving invariant expressions out of loops (by treating them as
216 partially redundant).
218 Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
219 assignment) based GVN (global value numbering). L. T. Simpson's paper
220 (Rice University) on value numbering is a useful reference for this.
222 **********************
224 We used to support multiple passes but there are diminishing returns in
225 doing so. The first pass usually makes 90% of the changes that are doable.
226 A second pass can make a few more changes made possible by the first pass.
227 Experiments show any further passes don't make enough changes to justify
228 the expense.
230 A study of spec92 using an unlimited number of passes:
231 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
232 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
233 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
235 It was found doing copy propagation between each pass enables further
236 substitutions.
238 PRE is quite expensive in complicated functions because the DFA can take
239 a while to converge. Hence we only perform one pass. The parameter
240 max-gcse-passes can be modified if one wants to experiment.
242 **********************
244 The steps for PRE are:
246 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
248 2) Perform the data flow analysis for PRE.
250 3) Delete the redundant instructions
252 4) Insert the required copies [if any] that make the partially
253 redundant instructions fully redundant.
255 5) For other reaching expressions, insert an instruction to copy the value
256 to a newly created pseudo that will reach the redundant instruction.
258 The deletion is done first so that when we do insertions we
259 know which pseudo reg to use.
261 Various papers have argued that PRE DFA is expensive (O(n^2)) and others
262 argue it is not. The number of iterations for the algorithm to converge
263 is typically 2-4 so I don't view it as that expensive (relatively speaking).
265 PRE GCSE depends heavily on the second CSE pass to clean up the copies
266 we create. To make an expression reach the place where it's redundant,
267 the result of the expression is copied to a new register, and the redundant
268 expression is deleted by replacing it with this new register. Classic GCSE
269 doesn't have this problem as much as it computes the reaching defs of
270 each register in each block and thus can try to use an existing
271 register. */
273 /* GCSE global vars. */
275 /* Note whether or not we should run jump optimization after gcse. We
276 want to do this for two cases.
278 * If we changed any jumps via cprop.
280 * If we added any labels via edge splitting. */
281 static int run_jump_opt_after_gcse;
283 /* An obstack for our working variables. */
284 static struct obstack gcse_obstack;
286 struct reg_use {rtx reg_rtx; };
288 /* Hash table of expressions. */
290 struct expr
292 /* The expression (SET_SRC for expressions, PATTERN for assignments). */
293 rtx expr;
294 /* Index in the available expression bitmaps. */
295 int bitmap_index;
296 /* Next entry with the same hash. */
297 struct expr *next_same_hash;
298 /* List of anticipatable occurrences in basic blocks in the function.
299 An "anticipatable occurrence" is one that is the first occurrence in the
300 basic block, the operands are not modified in the basic block prior
301 to the occurrence and the output is not used between the start of
302 the block and the occurrence. */
303 struct occr *antic_occr;
304 /* List of available occurrence in basic blocks in the function.
305 An "available occurrence" is one that is the last occurrence in the
306 basic block and the operands are not modified by following statements in
307 the basic block [including this insn]. */
308 struct occr *avail_occr;
309 /* Non-null if the computation is PRE redundant.
310 The value is the newly created pseudo-reg to record a copy of the
311 expression in all the places that reach the redundant copy. */
312 rtx reaching_reg;
315 /* Occurrence of an expression.
316 There is one per basic block. If a pattern appears more than once the
317 last appearance is used [or first for anticipatable expressions]. */
319 struct occr
321 /* Next occurrence of this expression. */
322 struct occr *next;
323 /* The insn that computes the expression. */
324 rtx insn;
325 /* Nonzero if this [anticipatable] occurrence has been deleted. */
326 char deleted_p;
327 /* Nonzero if this [available] occurrence has been copied to
328 reaching_reg. */
329 /* ??? This is mutually exclusive with deleted_p, so they could share
330 the same byte. */
331 char copied_p;
334 /* Expression and copy propagation hash tables.
335 Each hash table is an array of buckets.
336 ??? It is known that if it were an array of entries, structure elements
337 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
338 not clear whether in the final analysis a sufficient amount of memory would
339 be saved as the size of the available expression bitmaps would be larger
340 [one could build a mapping table without holes afterwards though].
341 Someday I'll perform the computation and figure it out. */
343 struct hash_table
345 /* The table itself.
346 This is an array of `expr_hash_table_size' elements. */
347 struct expr **table;
349 /* Size of the hash table, in elements. */
350 unsigned int size;
352 /* Number of hash table elements. */
353 unsigned int n_elems;
355 /* Whether the table is expression of copy propagation one. */
356 int set_p;
359 /* Expression hash table. */
360 static struct hash_table expr_hash_table;
362 /* Copy propagation hash table. */
363 static struct hash_table set_hash_table;
365 /* Mapping of uids to cuids.
366 Only real insns get cuids. */
367 static int *uid_cuid;
369 /* Highest UID in UID_CUID. */
370 static int max_uid;
372 /* Get the cuid of an insn. */
373 #ifdef ENABLE_CHECKING
374 #define INSN_CUID(INSN) \
375 (gcc_assert (INSN_UID (INSN) <= max_uid), uid_cuid[INSN_UID (INSN)])
376 #else
377 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
378 #endif
380 /* Number of cuids. */
381 static int max_cuid;
383 /* Maximum register number in function prior to doing gcse + 1.
384 Registers created during this pass have regno >= max_gcse_regno.
385 This is named with "gcse" to not collide with global of same name. */
386 static unsigned int max_gcse_regno;
388 /* Table of registers that are modified.
390 For each register, each element is a list of places where the pseudo-reg
391 is set.
393 For simplicity, GCSE is done on sets of pseudo-regs only. PRE GCSE only
394 requires knowledge of which blocks kill which regs [and thus could use
395 a bitmap instead of the lists `reg_set_table' uses].
397 `reg_set_table' and could be turned into an array of bitmaps (num-bbs x
398 num-regs) [however perhaps it may be useful to keep the data as is]. One
399 advantage of recording things this way is that `reg_set_table' is fairly
400 sparse with respect to pseudo regs but for hard regs could be fairly dense
401 [relatively speaking]. And recording sets of pseudo-regs in lists speeds
402 up functions like compute_transp since in the case of pseudo-regs we only
403 need to iterate over the number of times a pseudo-reg is set, not over the
404 number of basic blocks [clearly there is a bit of a slow down in the cases
405 where a pseudo is set more than once in a block, however it is believed
406 that the net effect is to speed things up]. This isn't done for hard-regs
407 because recording call-clobbered hard-regs in `reg_set_table' at each
408 function call can consume a fair bit of memory, and iterating over
409 hard-regs stored this way in compute_transp will be more expensive. */
411 typedef struct reg_set
413 /* The next setting of this register. */
414 struct reg_set *next;
415 /* The index of the block where it was set. */
416 int bb_index;
417 } reg_set;
419 static reg_set **reg_set_table;
421 /* Size of `reg_set_table'.
422 The table starts out at max_gcse_regno + slop, and is enlarged as
423 necessary. */
424 static int reg_set_table_size;
426 /* Amount to grow `reg_set_table' by when it's full. */
427 #define REG_SET_TABLE_SLOP 100
429 /* This is a list of expressions which are MEMs and will be used by load
430 or store motion.
431 Load motion tracks MEMs which aren't killed by
432 anything except itself. (i.e., loads and stores to a single location).
433 We can then allow movement of these MEM refs with a little special
434 allowance. (all stores copy the same value to the reaching reg used
435 for the loads). This means all values used to store into memory must have
436 no side effects so we can re-issue the setter value.
437 Store Motion uses this structure as an expression table to track stores
438 which look interesting, and might be moveable towards the exit block. */
440 struct ls_expr
442 struct expr * expr; /* Gcse expression reference for LM. */
443 rtx pattern; /* Pattern of this mem. */
444 rtx pattern_regs; /* List of registers mentioned by the mem. */
445 rtx loads; /* INSN list of loads seen. */
446 rtx stores; /* INSN list of stores seen. */
447 struct ls_expr * next; /* Next in the list. */
448 int invalid; /* Invalid for some reason. */
449 int index; /* If it maps to a bitmap index. */
450 unsigned int hash_index; /* Index when in a hash table. */
451 rtx reaching_reg; /* Register to use when re-writing. */
454 /* Array of implicit set patterns indexed by basic block index. */
455 static rtx *implicit_sets;
457 /* Head of the list of load/store memory refs. */
458 static struct ls_expr * pre_ldst_mems = NULL;
460 /* Hashtable for the load/store memory refs. */
461 static htab_t pre_ldst_table = NULL;
463 /* Bitmap containing one bit for each register in the program.
464 Used when performing GCSE to track which registers have been set since
465 the start of the basic block. */
466 static regset reg_set_bitmap;
468 /* For each block, a bitmap of registers set in the block.
469 This is used by compute_transp.
470 It is computed during hash table computation and not by compute_sets
471 as it includes registers added since the last pass (or between cprop and
472 gcse) and it's currently not easy to realloc sbitmap vectors. */
473 static sbitmap *reg_set_in_block;
475 /* Array, indexed by basic block number for a list of insns which modify
476 memory within that block. */
477 static rtx * modify_mem_list;
478 static bitmap modify_mem_list_set;
480 /* This array parallels modify_mem_list, but is kept canonicalized. */
481 static rtx * canon_modify_mem_list;
483 /* Bitmap indexed by block numbers to record which blocks contain
484 function calls. */
485 static bitmap blocks_with_calls;
487 /* Various variables for statistics gathering. */
489 /* Memory used in a pass.
490 This isn't intended to be absolutely precise. Its intent is only
491 to keep an eye on memory usage. */
492 static int bytes_used;
494 /* GCSE substitutions made. */
495 static int gcse_subst_count;
496 /* Number of copy instructions created. */
497 static int gcse_create_count;
498 /* Number of local constants propagated. */
499 static int local_const_prop_count;
500 /* Number of local copies propagated. */
501 static int local_copy_prop_count;
502 /* Number of global constants propagated. */
503 static int global_const_prop_count;
504 /* Number of global copies propagated. */
505 static int global_copy_prop_count;
507 /* For available exprs */
508 static sbitmap *ae_kill, *ae_gen;
510 static void compute_can_copy (void);
511 static void *gmalloc (size_t) ATTRIBUTE_MALLOC;
512 static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC;
513 static void *grealloc (void *, size_t);
514 static void *gcse_alloc (unsigned long);
515 static void alloc_gcse_mem (void);
516 static void free_gcse_mem (void);
517 static void alloc_reg_set_mem (int);
518 static void free_reg_set_mem (void);
519 static void record_one_set (int, rtx);
520 static void record_set_info (rtx, const_rtx, void *);
521 static void compute_sets (void);
522 static void hash_scan_insn (rtx, struct hash_table *, int);
523 static void hash_scan_set (rtx, rtx, struct hash_table *);
524 static void hash_scan_clobber (rtx, rtx, struct hash_table *);
525 static void hash_scan_call (rtx, rtx, struct hash_table *);
526 static int want_to_gcse_p (rtx);
527 static bool can_assign_to_reg_p (rtx);
528 static bool gcse_constant_p (const_rtx);
529 static int oprs_unchanged_p (const_rtx, const_rtx, int);
530 static int oprs_anticipatable_p (const_rtx, const_rtx);
531 static int oprs_available_p (const_rtx, const_rtx);
532 static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int,
533 struct hash_table *);
534 static void insert_set_in_table (rtx, rtx, struct hash_table *);
535 static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int);
536 static unsigned int hash_set (int, int);
537 static int expr_equiv_p (const_rtx, const_rtx);
538 static void record_last_reg_set_info (rtx, int);
539 static void record_last_mem_set_info (rtx);
540 static void record_last_set_info (rtx, const_rtx, void *);
541 static void compute_hash_table (struct hash_table *);
542 static void alloc_hash_table (int, struct hash_table *, int);
543 static void free_hash_table (struct hash_table *);
544 static void compute_hash_table_work (struct hash_table *);
545 static void dump_hash_table (FILE *, const char *, struct hash_table *);
546 static struct expr *lookup_set (unsigned int, struct hash_table *);
547 static struct expr *next_set (unsigned int, struct expr *);
548 static void reset_opr_set_tables (void);
549 static int oprs_not_set_p (const_rtx, const_rtx);
550 static void mark_call (rtx);
551 static void mark_set (rtx, rtx);
552 static void mark_clobber (rtx, rtx);
553 static void mark_oprs_set (rtx);
554 static void alloc_cprop_mem (int, int);
555 static void free_cprop_mem (void);
556 static void compute_transp (const_rtx, int, sbitmap *, int);
557 static void compute_transpout (void);
558 static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
559 struct hash_table *);
560 static void compute_cprop_data (void);
561 static void find_used_regs (rtx *, void *);
562 static int try_replace_reg (rtx, rtx, rtx);
563 static struct expr *find_avail_set (int, rtx);
564 static int cprop_jump (basic_block, rtx, rtx, rtx, rtx);
565 static void mems_conflict_for_gcse_p (rtx, const_rtx, void *);
566 static int load_killed_in_block_p (const_basic_block, int, const_rtx, int);
567 static void canon_list_insert (rtx, const_rtx, void *);
568 static int cprop_insn (rtx, int);
569 static int cprop (int);
570 static void find_implicit_sets (void);
571 static int one_cprop_pass (int, bool, bool);
572 static bool constprop_register (rtx, rtx, rtx, bool);
573 static struct expr *find_bypass_set (int, int);
574 static bool reg_killed_on_edge (const_rtx, const_edge);
575 static int bypass_block (basic_block, rtx, rtx);
576 static int bypass_conditional_jumps (void);
577 static void alloc_pre_mem (int, int);
578 static void free_pre_mem (void);
579 static void compute_pre_data (void);
580 static int pre_expr_reaches_here_p (basic_block, struct expr *,
581 basic_block);
582 static void insert_insn_end_basic_block (struct expr *, basic_block, int);
583 static void pre_insert_copy_insn (struct expr *, rtx);
584 static void pre_insert_copies (void);
585 static int pre_delete (void);
586 static int pre_gcse (void);
587 static int one_pre_gcse_pass (int);
588 static void add_label_notes (rtx, rtx);
589 static void alloc_code_hoist_mem (int, int);
590 static void free_code_hoist_mem (void);
591 static void compute_code_hoist_vbeinout (void);
592 static void compute_code_hoist_data (void);
593 static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *);
594 static void hoist_code (void);
595 static int one_code_hoisting_pass (void);
596 static rtx process_insert_insn (struct expr *);
597 static int pre_edge_insert (struct edge_list *, struct expr **);
598 static int pre_expr_reaches_here_p_work (basic_block, struct expr *,
599 basic_block, char *);
600 static struct ls_expr * ldst_entry (rtx);
601 static void free_ldst_entry (struct ls_expr *);
602 static void free_ldst_mems (void);
603 static void print_ldst_list (FILE *);
604 static struct ls_expr * find_rtx_in_ldst (rtx);
605 static int enumerate_ldsts (void);
606 static inline struct ls_expr * first_ls_expr (void);
607 static inline struct ls_expr * next_ls_expr (struct ls_expr *);
608 static int simple_mem (const_rtx);
609 static void invalidate_any_buried_refs (rtx);
610 static void compute_ld_motion_mems (void);
611 static void trim_ld_motion_mems (void);
612 static void update_ld_motion_stores (struct expr *);
613 static void reg_set_info (rtx, const_rtx, void *);
614 static void reg_clear_last_set (rtx, const_rtx, void *);
615 static bool store_ops_ok (const_rtx, int *);
616 static rtx extract_mentioned_regs (rtx);
617 static rtx extract_mentioned_regs_helper (rtx, rtx);
618 static void find_moveable_store (rtx, int *, int *);
619 static int compute_store_table (void);
620 static bool load_kills_store (const_rtx, const_rtx, int);
621 static bool find_loads (const_rtx, const_rtx, int);
622 static bool store_killed_in_insn (const_rtx, const_rtx, const_rtx, int);
623 static bool store_killed_after (const_rtx, const_rtx, const_rtx, const_basic_block, int *, rtx *);
624 static bool store_killed_before (const_rtx, const_rtx, const_rtx, const_basic_block, int *);
625 static void build_store_vectors (void);
626 static void insert_insn_start_basic_block (rtx, basic_block);
627 static int insert_store (struct ls_expr *, edge);
628 static void remove_reachable_equiv_notes (basic_block, struct ls_expr *);
629 static void replace_store_insn (rtx, rtx, basic_block, struct ls_expr *);
630 static void delete_store (struct ls_expr *, basic_block);
631 static void free_store_memory (void);
632 static void store_motion (void);
633 static void free_insn_expr_list_list (rtx *);
634 static void clear_modify_mem_tables (void);
635 static void free_modify_mem_tables (void);
636 static rtx gcse_emit_move_after (rtx, rtx, rtx);
637 static void local_cprop_find_used_regs (rtx *, void *);
638 static bool do_local_cprop (rtx, rtx, bool, rtx*);
639 static bool adjust_libcall_notes (rtx, rtx, rtx, rtx*);
640 static void local_cprop_pass (bool);
641 static bool is_too_expensive (const char *);
644 /* Entry point for global common subexpression elimination.
645 F is the first instruction in the function. Return nonzero if a
646 change is mode. */
648 static int
649 gcse_main (rtx f ATTRIBUTE_UNUSED)
651 int changed, pass;
652 /* Bytes used at start of pass. */
653 int initial_bytes_used;
654 /* Maximum number of bytes used by a pass. */
655 int max_pass_bytes;
656 /* Point to release obstack data from for each pass. */
657 char *gcse_obstack_bottom;
659 /* We do not construct an accurate cfg in functions which call
660 setjmp, so just punt to be safe. */
661 if (cfun->calls_setjmp)
662 return 0;
664 /* Assume that we do not need to run jump optimizations after gcse. */
665 run_jump_opt_after_gcse = 0;
667 /* Identify the basic block information for this function, including
668 successors and predecessors. */
669 max_gcse_regno = max_reg_num ();
671 df_note_add_problem ();
672 df_analyze ();
674 if (dump_file)
675 dump_flow_info (dump_file, dump_flags);
677 /* Return if there's nothing to do, or it is too expensive. */
678 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
679 || is_too_expensive (_("GCSE disabled")))
680 return 0;
682 gcc_obstack_init (&gcse_obstack);
683 bytes_used = 0;
685 /* We need alias. */
686 init_alias_analysis ();
687 /* Record where pseudo-registers are set. This data is kept accurate
688 during each pass. ??? We could also record hard-reg information here
689 [since it's unchanging], however it is currently done during hash table
690 computation.
692 It may be tempting to compute MEM set information here too, but MEM sets
693 will be subject to code motion one day and thus we need to compute
694 information about memory sets when we build the hash tables. */
696 alloc_reg_set_mem (max_gcse_regno);
697 compute_sets ();
699 pass = 0;
700 initial_bytes_used = bytes_used;
701 max_pass_bytes = 0;
702 gcse_obstack_bottom = gcse_alloc (1);
703 changed = 1;
704 while (changed && pass < MAX_GCSE_PASSES)
706 changed = 0;
707 if (dump_file)
708 fprintf (dump_file, "GCSE pass %d\n\n", pass + 1);
710 /* Initialize bytes_used to the space for the pred/succ lists,
711 and the reg_set_table data. */
712 bytes_used = initial_bytes_used;
714 /* Each pass may create new registers, so recalculate each time. */
715 max_gcse_regno = max_reg_num ();
717 alloc_gcse_mem ();
719 /* Don't allow constant propagation to modify jumps
720 during this pass. */
721 if (dbg_cnt (cprop1))
723 timevar_push (TV_CPROP1);
724 changed = one_cprop_pass (pass + 1, false, false);
725 timevar_pop (TV_CPROP1);
728 if (optimize_size)
729 /* Do nothing. */ ;
730 else
732 timevar_push (TV_PRE);
733 changed |= one_pre_gcse_pass (pass + 1);
734 /* We may have just created new basic blocks. Release and
735 recompute various things which are sized on the number of
736 basic blocks. */
737 if (changed)
739 free_modify_mem_tables ();
740 modify_mem_list = gcalloc (last_basic_block, sizeof (rtx));
741 canon_modify_mem_list = gcalloc (last_basic_block, sizeof (rtx));
743 free_reg_set_mem ();
744 alloc_reg_set_mem (max_reg_num ());
745 compute_sets ();
746 run_jump_opt_after_gcse = 1;
747 timevar_pop (TV_PRE);
750 if (max_pass_bytes < bytes_used)
751 max_pass_bytes = bytes_used;
753 /* Free up memory, then reallocate for code hoisting. We can
754 not re-use the existing allocated memory because the tables
755 will not have info for the insns or registers created by
756 partial redundancy elimination. */
757 free_gcse_mem ();
759 /* It does not make sense to run code hoisting unless we are optimizing
760 for code size -- it rarely makes programs faster, and can make
761 them bigger if we did partial redundancy elimination (when optimizing
762 for space, we don't run the partial redundancy algorithms). */
763 if (optimize_size)
765 timevar_push (TV_HOIST);
766 max_gcse_regno = max_reg_num ();
767 alloc_gcse_mem ();
768 changed |= one_code_hoisting_pass ();
769 free_gcse_mem ();
771 if (max_pass_bytes < bytes_used)
772 max_pass_bytes = bytes_used;
773 timevar_pop (TV_HOIST);
776 if (dump_file)
778 fprintf (dump_file, "\n");
779 fflush (dump_file);
782 obstack_free (&gcse_obstack, gcse_obstack_bottom);
783 pass++;
786 /* Do one last pass of copy propagation, including cprop into
787 conditional jumps. */
789 if (dbg_cnt (cprop2))
791 max_gcse_regno = max_reg_num ();
792 alloc_gcse_mem ();
794 /* This time, go ahead and allow cprop to alter jumps. */
795 timevar_push (TV_CPROP2);
796 one_cprop_pass (pass + 1, true, true);
797 timevar_pop (TV_CPROP2);
798 free_gcse_mem ();
801 if (dump_file)
803 fprintf (dump_file, "GCSE of %s: %d basic blocks, ",
804 current_function_name (), n_basic_blocks);
805 fprintf (dump_file, "%d pass%s, %d bytes\n\n",
806 pass, pass > 1 ? "es" : "", max_pass_bytes);
809 obstack_free (&gcse_obstack, NULL);
810 free_reg_set_mem ();
812 /* We are finished with alias. */
813 end_alias_analysis ();
815 if (!optimize_size && flag_gcse_sm)
817 timevar_push (TV_LSM);
818 store_motion ();
819 timevar_pop (TV_LSM);
822 /* Record where pseudo-registers are set. */
823 return run_jump_opt_after_gcse;
826 /* Misc. utilities. */
828 /* Nonzero for each mode that supports (set (reg) (reg)).
829 This is trivially true for integer and floating point values.
830 It may or may not be true for condition codes. */
831 static char can_copy[(int) NUM_MACHINE_MODES];
833 /* Compute which modes support reg/reg copy operations. */
835 static void
836 compute_can_copy (void)
838 int i;
839 #ifndef AVOID_CCMODE_COPIES
840 rtx reg, insn;
841 #endif
842 memset (can_copy, 0, NUM_MACHINE_MODES);
844 start_sequence ();
845 for (i = 0; i < NUM_MACHINE_MODES; i++)
846 if (GET_MODE_CLASS (i) == MODE_CC)
848 #ifdef AVOID_CCMODE_COPIES
849 can_copy[i] = 0;
850 #else
851 reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
852 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
853 if (recog (PATTERN (insn), insn, NULL) >= 0)
854 can_copy[i] = 1;
855 #endif
857 else
858 can_copy[i] = 1;
860 end_sequence ();
863 /* Returns whether the mode supports reg/reg copy operations. */
865 bool
866 can_copy_p (enum machine_mode mode)
868 static bool can_copy_init_p = false;
870 if (! can_copy_init_p)
872 compute_can_copy ();
873 can_copy_init_p = true;
876 return can_copy[mode] != 0;
879 /* Cover function to xmalloc to record bytes allocated. */
881 static void *
882 gmalloc (size_t size)
884 bytes_used += size;
885 return xmalloc (size);
888 /* Cover function to xcalloc to record bytes allocated. */
890 static void *
891 gcalloc (size_t nelem, size_t elsize)
893 bytes_used += nelem * elsize;
894 return xcalloc (nelem, elsize);
897 /* Cover function to xrealloc.
898 We don't record the additional size since we don't know it.
899 It won't affect memory usage stats much anyway. */
901 static void *
902 grealloc (void *ptr, size_t size)
904 return xrealloc (ptr, size);
907 /* Cover function to obstack_alloc. */
909 static void *
910 gcse_alloc (unsigned long size)
912 bytes_used += size;
913 return obstack_alloc (&gcse_obstack, size);
916 /* Allocate memory for the cuid mapping array,
917 and reg/memory set tracking tables.
919 This is called at the start of each pass. */
921 static void
922 alloc_gcse_mem (void)
924 int i;
925 basic_block bb;
926 rtx insn;
928 /* Find the largest UID and create a mapping from UIDs to CUIDs.
929 CUIDs are like UIDs except they increase monotonically, have no gaps,
930 and only apply to real insns.
931 (Actually, there are gaps, for insn that are not inside a basic block.
932 but we should never see those anyway, so this is OK.) */
934 max_uid = get_max_uid ();
935 uid_cuid = gcalloc (max_uid + 1, sizeof (int));
936 i = 0;
937 FOR_EACH_BB (bb)
938 FOR_BB_INSNS (bb, insn)
940 if (INSN_P (insn))
941 uid_cuid[INSN_UID (insn)] = i++;
942 else
943 uid_cuid[INSN_UID (insn)] = i;
946 max_cuid = i;
948 /* Allocate vars to track sets of regs. */
949 reg_set_bitmap = BITMAP_ALLOC (NULL);
951 /* Allocate vars to track sets of regs, memory per block. */
952 reg_set_in_block = sbitmap_vector_alloc (last_basic_block, max_gcse_regno);
953 /* Allocate array to keep a list of insns which modify memory in each
954 basic block. */
955 modify_mem_list = gcalloc (last_basic_block, sizeof (rtx));
956 canon_modify_mem_list = gcalloc (last_basic_block, sizeof (rtx));
957 modify_mem_list_set = BITMAP_ALLOC (NULL);
958 blocks_with_calls = BITMAP_ALLOC (NULL);
961 /* Free memory allocated by alloc_gcse_mem. */
963 static void
964 free_gcse_mem (void)
966 free (uid_cuid);
968 BITMAP_FREE (reg_set_bitmap);
970 sbitmap_vector_free (reg_set_in_block);
971 free_modify_mem_tables ();
972 BITMAP_FREE (modify_mem_list_set);
973 BITMAP_FREE (blocks_with_calls);
976 /* Compute the local properties of each recorded expression.
978 Local properties are those that are defined by the block, irrespective of
979 other blocks.
981 An expression is transparent in a block if its operands are not modified
982 in the block.
984 An expression is computed (locally available) in a block if it is computed
985 at least once and expression would contain the same value if the
986 computation was moved to the end of the block.
988 An expression is locally anticipatable in a block if it is computed at
989 least once and expression would contain the same value if the computation
990 was moved to the beginning of the block.
992 We call this routine for cprop, pre and code hoisting. They all compute
993 basically the same information and thus can easily share this code.
995 TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
996 properties. If NULL, then it is not necessary to compute or record that
997 particular property.
999 TABLE controls which hash table to look at. If it is set hash table,
1000 additionally, TRANSP is computed as ~TRANSP, since this is really cprop's
1001 ABSALTERED. */
1003 static void
1004 compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc,
1005 struct hash_table *table)
1007 unsigned int i;
1009 /* Initialize any bitmaps that were passed in. */
1010 if (transp)
1012 if (table->set_p)
1013 sbitmap_vector_zero (transp, last_basic_block);
1014 else
1015 sbitmap_vector_ones (transp, last_basic_block);
1018 if (comp)
1019 sbitmap_vector_zero (comp, last_basic_block);
1020 if (antloc)
1021 sbitmap_vector_zero (antloc, last_basic_block);
1023 for (i = 0; i < table->size; i++)
1025 struct expr *expr;
1027 for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
1029 int indx = expr->bitmap_index;
1030 struct occr *occr;
1032 /* The expression is transparent in this block if it is not killed.
1033 We start by assuming all are transparent [none are killed], and
1034 then reset the bits for those that are. */
1035 if (transp)
1036 compute_transp (expr->expr, indx, transp, table->set_p);
1038 /* The occurrences recorded in antic_occr are exactly those that
1039 we want to set to nonzero in ANTLOC. */
1040 if (antloc)
1041 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1043 SET_BIT (antloc[BLOCK_NUM (occr->insn)], indx);
1045 /* While we're scanning the table, this is a good place to
1046 initialize this. */
1047 occr->deleted_p = 0;
1050 /* The occurrences recorded in avail_occr are exactly those that
1051 we want to set to nonzero in COMP. */
1052 if (comp)
1053 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
1055 SET_BIT (comp[BLOCK_NUM (occr->insn)], indx);
1057 /* While we're scanning the table, this is a good place to
1058 initialize this. */
1059 occr->copied_p = 0;
1062 /* While we're scanning the table, this is a good place to
1063 initialize this. */
1064 expr->reaching_reg = 0;
1069 /* Register set information.
1071 `reg_set_table' records where each register is set or otherwise
1072 modified. */
1074 static struct obstack reg_set_obstack;
1076 static void
1077 alloc_reg_set_mem (int n_regs)
1079 reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
1080 reg_set_table = gcalloc (reg_set_table_size, sizeof (struct reg_set *));
1082 gcc_obstack_init (&reg_set_obstack);
1085 static void
1086 free_reg_set_mem (void)
1088 free (reg_set_table);
1089 obstack_free (&reg_set_obstack, NULL);
1092 /* Record REGNO in the reg_set table. */
1094 static void
1095 record_one_set (int regno, rtx insn)
1097 /* Allocate a new reg_set element and link it onto the list. */
1098 struct reg_set *new_reg_info;
1100 /* If the table isn't big enough, enlarge it. */
1101 if (regno >= reg_set_table_size)
1103 int new_size = regno + REG_SET_TABLE_SLOP;
1105 reg_set_table = grealloc (reg_set_table,
1106 new_size * sizeof (struct reg_set *));
1107 memset (reg_set_table + reg_set_table_size, 0,
1108 (new_size - reg_set_table_size) * sizeof (struct reg_set *));
1109 reg_set_table_size = new_size;
1112 new_reg_info = obstack_alloc (&reg_set_obstack, sizeof (struct reg_set));
1113 bytes_used += sizeof (struct reg_set);
1114 new_reg_info->bb_index = BLOCK_NUM (insn);
1115 new_reg_info->next = reg_set_table[regno];
1116 reg_set_table[regno] = new_reg_info;
1119 /* Called from compute_sets via note_stores to handle one SET or CLOBBER in
1120 an insn. The DATA is really the instruction in which the SET is
1121 occurring. */
1123 static void
1124 record_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
1126 rtx record_set_insn = (rtx) data;
1128 if (REG_P (dest) && REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1129 record_one_set (REGNO (dest), record_set_insn);
1132 /* Scan the function and record each set of each pseudo-register.
1134 This is called once, at the start of the gcse pass. See the comments for
1135 `reg_set_table' for further documentation. */
1137 static void
1138 compute_sets (void)
1140 basic_block bb;
1141 rtx insn;
1143 FOR_EACH_BB (bb)
1144 FOR_BB_INSNS (bb, insn)
1145 if (INSN_P (insn))
1146 note_stores (PATTERN (insn), record_set_info, insn);
1149 /* Hash table support. */
1151 struct reg_avail_info
1153 basic_block last_bb;
1154 int first_set;
1155 int last_set;
1158 static struct reg_avail_info *reg_avail_info;
1159 static basic_block current_bb;
1162 /* See whether X, the source of a set, is something we want to consider for
1163 GCSE. */
1165 static int
1166 want_to_gcse_p (rtx x)
1168 #ifdef STACK_REGS
1169 /* On register stack architectures, don't GCSE constants from the
1170 constant pool, as the benefits are often swamped by the overhead
1171 of shuffling the register stack between basic blocks. */
1172 if (IS_STACK_MODE (GET_MODE (x)))
1173 x = avoid_constant_pool_reference (x);
1174 #endif
1176 switch (GET_CODE (x))
1178 case REG:
1179 case SUBREG:
1180 case CONST_INT:
1181 case CONST_DOUBLE:
1182 case CONST_FIXED:
1183 case CONST_VECTOR:
1184 case CALL:
1185 return 0;
1187 default:
1188 return can_assign_to_reg_p (x);
1192 /* Used internally by can_assign_to_reg_p. */
1194 static GTY(()) rtx test_insn;
1196 /* Return true if we can assign X to a pseudo register. */
1198 static bool
1199 can_assign_to_reg_p (rtx x)
1201 int num_clobbers = 0;
1202 int icode;
1204 /* If this is a valid operand, we are OK. If it's VOIDmode, we aren't. */
1205 if (general_operand (x, GET_MODE (x)))
1206 return 1;
1207 else if (GET_MODE (x) == VOIDmode)
1208 return 0;
1210 /* Otherwise, check if we can make a valid insn from it. First initialize
1211 our test insn if we haven't already. */
1212 if (test_insn == 0)
1214 test_insn
1215 = make_insn_raw (gen_rtx_SET (VOIDmode,
1216 gen_rtx_REG (word_mode,
1217 FIRST_PSEUDO_REGISTER * 2),
1218 const0_rtx));
1219 NEXT_INSN (test_insn) = PREV_INSN (test_insn) = 0;
1222 /* Now make an insn like the one we would make when GCSE'ing and see if
1223 valid. */
1224 PUT_MODE (SET_DEST (PATTERN (test_insn)), GET_MODE (x));
1225 SET_SRC (PATTERN (test_insn)) = x;
1226 return ((icode = recog (PATTERN (test_insn), test_insn, &num_clobbers)) >= 0
1227 && (num_clobbers == 0 || ! added_clobbers_hard_reg_p (icode)));
1230 /* Return nonzero if the operands of expression X are unchanged from the
1231 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1232 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */
1234 static int
1235 oprs_unchanged_p (const_rtx x, const_rtx insn, int avail_p)
1237 int i, j;
1238 enum rtx_code code;
1239 const char *fmt;
1241 if (x == 0)
1242 return 1;
1244 code = GET_CODE (x);
1245 switch (code)
1247 case REG:
1249 struct reg_avail_info *info = &reg_avail_info[REGNO (x)];
1251 if (info->last_bb != current_bb)
1252 return 1;
1253 if (avail_p)
1254 return info->last_set < INSN_CUID (insn);
1255 else
1256 return info->first_set >= INSN_CUID (insn);
1259 case MEM:
1260 if (load_killed_in_block_p (current_bb, INSN_CUID (insn),
1261 x, avail_p))
1262 return 0;
1263 else
1264 return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
1266 case PRE_DEC:
1267 case PRE_INC:
1268 case POST_DEC:
1269 case POST_INC:
1270 case PRE_MODIFY:
1271 case POST_MODIFY:
1272 return 0;
1274 case PC:
1275 case CC0: /*FIXME*/
1276 case CONST:
1277 case CONST_INT:
1278 case CONST_DOUBLE:
1279 case CONST_FIXED:
1280 case CONST_VECTOR:
1281 case SYMBOL_REF:
1282 case LABEL_REF:
1283 case ADDR_VEC:
1284 case ADDR_DIFF_VEC:
1285 return 1;
1287 default:
1288 break;
1291 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1293 if (fmt[i] == 'e')
1295 /* If we are about to do the last recursive call needed at this
1296 level, change it into iteration. This function is called enough
1297 to be worth it. */
1298 if (i == 0)
1299 return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
1301 else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
1302 return 0;
1304 else if (fmt[i] == 'E')
1305 for (j = 0; j < XVECLEN (x, i); j++)
1306 if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1307 return 0;
1310 return 1;
1313 /* Used for communication between mems_conflict_for_gcse_p and
1314 load_killed_in_block_p. Nonzero if mems_conflict_for_gcse_p finds a
1315 conflict between two memory references. */
1316 static int gcse_mems_conflict_p;
1318 /* Used for communication between mems_conflict_for_gcse_p and
1319 load_killed_in_block_p. A memory reference for a load instruction,
1320 mems_conflict_for_gcse_p will see if a memory store conflicts with
1321 this memory load. */
1322 static const_rtx gcse_mem_operand;
1324 /* DEST is the output of an instruction. If it is a memory reference, and
1325 possibly conflicts with the load found in gcse_mem_operand, then set
1326 gcse_mems_conflict_p to a nonzero value. */
1328 static void
1329 mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
1330 void *data ATTRIBUTE_UNUSED)
1332 while (GET_CODE (dest) == SUBREG
1333 || GET_CODE (dest) == ZERO_EXTRACT
1334 || GET_CODE (dest) == STRICT_LOW_PART)
1335 dest = XEXP (dest, 0);
1337 /* If DEST is not a MEM, then it will not conflict with the load. Note
1338 that function calls are assumed to clobber memory, but are handled
1339 elsewhere. */
1340 if (! MEM_P (dest))
1341 return;
1343 /* If we are setting a MEM in our list of specially recognized MEMs,
1344 don't mark as killed this time. */
1346 if (expr_equiv_p (dest, gcse_mem_operand) && pre_ldst_mems != NULL)
1348 if (!find_rtx_in_ldst (dest))
1349 gcse_mems_conflict_p = 1;
1350 return;
1353 if (true_dependence (dest, GET_MODE (dest), gcse_mem_operand,
1354 rtx_addr_varies_p))
1355 gcse_mems_conflict_p = 1;
1358 /* Return nonzero if the expression in X (a memory reference) is killed
1359 in block BB before or after the insn with the CUID in UID_LIMIT.
1360 AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
1361 before UID_LIMIT.
1363 To check the entire block, set UID_LIMIT to max_uid + 1 and
1364 AVAIL_P to 0. */
1366 static int
1367 load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x, int avail_p)
1369 rtx list_entry = modify_mem_list[bb->index];
1371 /* If this is a readonly then we aren't going to be changing it. */
1372 if (MEM_READONLY_P (x))
1373 return 0;
1375 while (list_entry)
1377 rtx setter;
1378 /* Ignore entries in the list that do not apply. */
1379 if ((avail_p
1380 && INSN_CUID (XEXP (list_entry, 0)) < uid_limit)
1381 || (! avail_p
1382 && INSN_CUID (XEXP (list_entry, 0)) > uid_limit))
1384 list_entry = XEXP (list_entry, 1);
1385 continue;
1388 setter = XEXP (list_entry, 0);
1390 /* If SETTER is a call everything is clobbered. Note that calls
1391 to pure functions are never put on the list, so we need not
1392 worry about them. */
1393 if (CALL_P (setter))
1394 return 1;
1396 /* SETTER must be an INSN of some kind that sets memory. Call
1397 note_stores to examine each hunk of memory that is modified.
1399 The note_stores interface is pretty limited, so we have to
1400 communicate via global variables. Yuk. */
1401 gcse_mem_operand = x;
1402 gcse_mems_conflict_p = 0;
1403 note_stores (PATTERN (setter), mems_conflict_for_gcse_p, NULL);
1404 if (gcse_mems_conflict_p)
1405 return 1;
1406 list_entry = XEXP (list_entry, 1);
1408 return 0;
1411 /* Return nonzero if the operands of expression X are unchanged from
1412 the start of INSN's basic block up to but not including INSN. */
1414 static int
1415 oprs_anticipatable_p (const_rtx x, const_rtx insn)
1417 return oprs_unchanged_p (x, insn, 0);
1420 /* Return nonzero if the operands of expression X are unchanged from
1421 INSN to the end of INSN's basic block. */
1423 static int
1424 oprs_available_p (const_rtx x, const_rtx insn)
1426 return oprs_unchanged_p (x, insn, 1);
1429 /* Hash expression X.
1431 MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean
1432 indicating if a volatile operand is found or if the expression contains
1433 something we don't want to insert in the table. HASH_TABLE_SIZE is
1434 the current size of the hash table to be probed. */
1436 static unsigned int
1437 hash_expr (const_rtx x, enum machine_mode mode, int *do_not_record_p,
1438 int hash_table_size)
1440 unsigned int hash;
1442 *do_not_record_p = 0;
1444 hash = hash_rtx (x, mode, do_not_record_p,
1445 NULL, /*have_reg_qty=*/false);
1446 return hash % hash_table_size;
1449 /* Hash a set of register REGNO.
1451 Sets are hashed on the register that is set. This simplifies the PRE copy
1452 propagation code.
1454 ??? May need to make things more elaborate. Later, as necessary. */
1456 static unsigned int
1457 hash_set (int regno, int hash_table_size)
1459 unsigned int hash;
1461 hash = regno;
1462 return hash % hash_table_size;
1465 /* Return nonzero if exp1 is equivalent to exp2. */
1467 static int
1468 expr_equiv_p (const_rtx x, const_rtx y)
1470 return exp_equiv_p (x, y, 0, true);
1473 /* Insert expression X in INSN in the hash TABLE.
1474 If it is already present, record it as the last occurrence in INSN's
1475 basic block.
1477 MODE is the mode of the value X is being stored into.
1478 It is only used if X is a CONST_INT.
1480 ANTIC_P is nonzero if X is an anticipatable expression.
1481 AVAIL_P is nonzero if X is an available expression. */
1483 static void
1484 insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
1485 int avail_p, struct hash_table *table)
1487 int found, do_not_record_p;
1488 unsigned int hash;
1489 struct expr *cur_expr, *last_expr = NULL;
1490 struct occr *antic_occr, *avail_occr;
1492 hash = hash_expr (x, mode, &do_not_record_p, table->size);
1494 /* Do not insert expression in table if it contains volatile operands,
1495 or if hash_expr determines the expression is something we don't want
1496 to or can't handle. */
1497 if (do_not_record_p)
1498 return;
1500 cur_expr = table->table[hash];
1501 found = 0;
1503 while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1505 /* If the expression isn't found, save a pointer to the end of
1506 the list. */
1507 last_expr = cur_expr;
1508 cur_expr = cur_expr->next_same_hash;
1511 if (! found)
1513 cur_expr = gcse_alloc (sizeof (struct expr));
1514 bytes_used += sizeof (struct expr);
1515 if (table->table[hash] == NULL)
1516 /* This is the first pattern that hashed to this index. */
1517 table->table[hash] = cur_expr;
1518 else
1519 /* Add EXPR to end of this hash chain. */
1520 last_expr->next_same_hash = cur_expr;
1522 /* Set the fields of the expr element. */
1523 cur_expr->expr = x;
1524 cur_expr->bitmap_index = table->n_elems++;
1525 cur_expr->next_same_hash = NULL;
1526 cur_expr->antic_occr = NULL;
1527 cur_expr->avail_occr = NULL;
1530 /* Now record the occurrence(s). */
1531 if (antic_p)
1533 antic_occr = cur_expr->antic_occr;
1535 if (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
1536 antic_occr = NULL;
1538 if (antic_occr)
1539 /* Found another instance of the expression in the same basic block.
1540 Prefer the currently recorded one. We want the first one in the
1541 block and the block is scanned from start to end. */
1542 ; /* nothing to do */
1543 else
1545 /* First occurrence of this expression in this basic block. */
1546 antic_occr = gcse_alloc (sizeof (struct occr));
1547 bytes_used += sizeof (struct occr);
1548 antic_occr->insn = insn;
1549 antic_occr->next = cur_expr->antic_occr;
1550 antic_occr->deleted_p = 0;
1551 cur_expr->antic_occr = antic_occr;
1555 if (avail_p)
1557 avail_occr = cur_expr->avail_occr;
1559 if (avail_occr && BLOCK_NUM (avail_occr->insn) == BLOCK_NUM (insn))
1561 /* Found another instance of the expression in the same basic block.
1562 Prefer this occurrence to the currently recorded one. We want
1563 the last one in the block and the block is scanned from start
1564 to end. */
1565 avail_occr->insn = insn;
1567 else
1569 /* First occurrence of this expression in this basic block. */
1570 avail_occr = gcse_alloc (sizeof (struct occr));
1571 bytes_used += sizeof (struct occr);
1572 avail_occr->insn = insn;
1573 avail_occr->next = cur_expr->avail_occr;
1574 avail_occr->deleted_p = 0;
1575 cur_expr->avail_occr = avail_occr;
1580 /* Insert pattern X in INSN in the hash table.
1581 X is a SET of a reg to either another reg or a constant.
1582 If it is already present, record it as the last occurrence in INSN's
1583 basic block. */
1585 static void
1586 insert_set_in_table (rtx x, rtx insn, struct hash_table *table)
1588 int found;
1589 unsigned int hash;
1590 struct expr *cur_expr, *last_expr = NULL;
1591 struct occr *cur_occr;
1593 gcc_assert (GET_CODE (x) == SET && REG_P (SET_DEST (x)));
1595 hash = hash_set (REGNO (SET_DEST (x)), table->size);
1597 cur_expr = table->table[hash];
1598 found = 0;
1600 while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1602 /* If the expression isn't found, save a pointer to the end of
1603 the list. */
1604 last_expr = cur_expr;
1605 cur_expr = cur_expr->next_same_hash;
1608 if (! found)
1610 cur_expr = gcse_alloc (sizeof (struct expr));
1611 bytes_used += sizeof (struct expr);
1612 if (table->table[hash] == NULL)
1613 /* This is the first pattern that hashed to this index. */
1614 table->table[hash] = cur_expr;
1615 else
1616 /* Add EXPR to end of this hash chain. */
1617 last_expr->next_same_hash = cur_expr;
1619 /* Set the fields of the expr element.
1620 We must copy X because it can be modified when copy propagation is
1621 performed on its operands. */
1622 cur_expr->expr = copy_rtx (x);
1623 cur_expr->bitmap_index = table->n_elems++;
1624 cur_expr->next_same_hash = NULL;
1625 cur_expr->antic_occr = NULL;
1626 cur_expr->avail_occr = NULL;
1629 /* Now record the occurrence. */
1630 cur_occr = cur_expr->avail_occr;
1632 if (cur_occr && BLOCK_NUM (cur_occr->insn) == BLOCK_NUM (insn))
1634 /* Found another instance of the expression in the same basic block.
1635 Prefer this occurrence to the currently recorded one. We want
1636 the last one in the block and the block is scanned from start
1637 to end. */
1638 cur_occr->insn = insn;
1640 else
1642 /* First occurrence of this expression in this basic block. */
1643 cur_occr = gcse_alloc (sizeof (struct occr));
1644 bytes_used += sizeof (struct occr);
1646 cur_occr->insn = insn;
1647 cur_occr->next = cur_expr->avail_occr;
1648 cur_occr->deleted_p = 0;
1649 cur_expr->avail_occr = cur_occr;
1653 /* Determine whether the rtx X should be treated as a constant for
1654 the purposes of GCSE's constant propagation. */
1656 static bool
1657 gcse_constant_p (const_rtx x)
1659 /* Consider a COMPARE of two integers constant. */
1660 if (GET_CODE (x) == COMPARE
1661 && GET_CODE (XEXP (x, 0)) == CONST_INT
1662 && GET_CODE (XEXP (x, 1)) == CONST_INT)
1663 return true;
1665 /* Consider a COMPARE of the same registers is a constant
1666 if they are not floating point registers. */
1667 if (GET_CODE(x) == COMPARE
1668 && REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1))
1669 && REGNO (XEXP (x, 0)) == REGNO (XEXP (x, 1))
1670 && ! FLOAT_MODE_P (GET_MODE (XEXP (x, 0)))
1671 && ! FLOAT_MODE_P (GET_MODE (XEXP (x, 1))))
1672 return true;
1674 return CONSTANT_P (x);
1677 /* Scan pattern PAT of INSN and add an entry to the hash TABLE (set or
1678 expression one). */
1680 static void
1681 hash_scan_set (rtx pat, rtx insn, struct hash_table *table)
1683 rtx src = SET_SRC (pat);
1684 rtx dest = SET_DEST (pat);
1685 rtx note;
1687 if (GET_CODE (src) == CALL)
1688 hash_scan_call (src, insn, table);
1690 else if (REG_P (dest))
1692 unsigned int regno = REGNO (dest);
1693 rtx tmp;
1695 /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
1697 This allows us to do a single GCSE pass and still eliminate
1698 redundant constants, addresses or other expressions that are
1699 constructed with multiple instructions.
1701 However, keep the original SRC if INSN is a simple reg-reg move. In
1702 In this case, there will almost always be a REG_EQUAL note on the
1703 insn that sets SRC. By recording the REG_EQUAL value here as SRC
1704 for INSN, we miss copy propagation opportunities and we perform the
1705 same PRE GCSE operation repeatedly on the same REG_EQUAL value if we
1706 do more than one PRE GCSE pass.
1708 Note that this does not impede profitale constant propagations. We
1709 "look through" reg-reg sets in lookup_avail_set. */
1710 note = find_reg_equal_equiv_note (insn);
1711 if (note != 0
1712 && REG_NOTE_KIND (note) == REG_EQUAL
1713 && !REG_P (src)
1714 && (table->set_p
1715 ? gcse_constant_p (XEXP (note, 0))
1716 : want_to_gcse_p (XEXP (note, 0))))
1717 src = XEXP (note, 0), pat = gen_rtx_SET (VOIDmode, dest, src);
1719 /* Only record sets of pseudo-regs in the hash table. */
1720 if (! table->set_p
1721 && regno >= FIRST_PSEUDO_REGISTER
1722 /* Don't GCSE something if we can't do a reg/reg copy. */
1723 && can_copy_p (GET_MODE (dest))
1724 /* GCSE commonly inserts instruction after the insn. We can't
1725 do that easily for EH_REGION notes so disable GCSE on these
1726 for now. */
1727 && !find_reg_note (insn, REG_EH_REGION, NULL_RTX)
1728 /* Is SET_SRC something we want to gcse? */
1729 && want_to_gcse_p (src)
1730 /* Don't CSE a nop. */
1731 && ! set_noop_p (pat)
1732 /* Don't GCSE if it has attached REG_EQUIV note.
1733 At this point this only function parameters should have
1734 REG_EQUIV notes and if the argument slot is used somewhere
1735 explicitly, it means address of parameter has been taken,
1736 so we should not extend the lifetime of the pseudo. */
1737 && (note == NULL_RTX || ! MEM_P (XEXP (note, 0))))
1739 /* An expression is not anticipatable if its operands are
1740 modified before this insn or if this is not the only SET in
1741 this insn. The latter condition does not have to mean that
1742 SRC itself is not anticipatable, but we just will not be
1743 able to handle code motion of insns with multiple sets. */
1744 int antic_p = oprs_anticipatable_p (src, insn)
1745 && !multiple_sets (insn);
1746 /* An expression is not available if its operands are
1747 subsequently modified, including this insn. It's also not
1748 available if this is a branch, because we can't insert
1749 a set after the branch. */
1750 int avail_p = (oprs_available_p (src, insn)
1751 && ! JUMP_P (insn));
1753 insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p, table);
1756 /* Record sets for constant/copy propagation. */
1757 else if (table->set_p
1758 && regno >= FIRST_PSEUDO_REGISTER
1759 && ((REG_P (src)
1760 && REGNO (src) >= FIRST_PSEUDO_REGISTER
1761 && can_copy_p (GET_MODE (dest))
1762 && REGNO (src) != regno)
1763 || gcse_constant_p (src))
1764 /* A copy is not available if its src or dest is subsequently
1765 modified. Here we want to search from INSN+1 on, but
1766 oprs_available_p searches from INSN on. */
1767 && (insn == BB_END (BLOCK_FOR_INSN (insn))
1768 || (tmp = next_nonnote_insn (insn)) == NULL_RTX
1769 || BLOCK_FOR_INSN (tmp) != BLOCK_FOR_INSN (insn)
1770 || oprs_available_p (pat, tmp)))
1771 insert_set_in_table (pat, insn, table);
1773 /* In case of store we want to consider the memory value as available in
1774 the REG stored in that memory. This makes it possible to remove
1775 redundant loads from due to stores to the same location. */
1776 else if (flag_gcse_las && REG_P (src) && MEM_P (dest))
1778 unsigned int regno = REGNO (src);
1780 /* Do not do this for constant/copy propagation. */
1781 if (! table->set_p
1782 /* Only record sets of pseudo-regs in the hash table. */
1783 && regno >= FIRST_PSEUDO_REGISTER
1784 /* Don't GCSE something if we can't do a reg/reg copy. */
1785 && can_copy_p (GET_MODE (src))
1786 /* GCSE commonly inserts instruction after the insn. We can't
1787 do that easily for EH_REGION notes so disable GCSE on these
1788 for now. */
1789 && ! find_reg_note (insn, REG_EH_REGION, NULL_RTX)
1790 /* Is SET_DEST something we want to gcse? */
1791 && want_to_gcse_p (dest)
1792 /* Don't CSE a nop. */
1793 && ! set_noop_p (pat)
1794 /* Don't GCSE if it has attached REG_EQUIV note.
1795 At this point this only function parameters should have
1796 REG_EQUIV notes and if the argument slot is used somewhere
1797 explicitly, it means address of parameter has been taken,
1798 so we should not extend the lifetime of the pseudo. */
1799 && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
1800 || ! MEM_P (XEXP (note, 0))))
1802 /* Stores are never anticipatable. */
1803 int antic_p = 0;
1804 /* An expression is not available if its operands are
1805 subsequently modified, including this insn. It's also not
1806 available if this is a branch, because we can't insert
1807 a set after the branch. */
1808 int avail_p = oprs_available_p (dest, insn)
1809 && ! JUMP_P (insn);
1811 /* Record the memory expression (DEST) in the hash table. */
1812 insert_expr_in_table (dest, GET_MODE (dest), insn,
1813 antic_p, avail_p, table);
1818 static void
1819 hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1820 struct hash_table *table ATTRIBUTE_UNUSED)
1822 /* Currently nothing to do. */
1825 static void
1826 hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1827 struct hash_table *table ATTRIBUTE_UNUSED)
1829 /* Currently nothing to do. */
1832 /* Process INSN and add hash table entries as appropriate.
1834 Only available expressions that set a single pseudo-reg are recorded.
1836 Single sets in a PARALLEL could be handled, but it's an extra complication
1837 that isn't dealt with right now. The trick is handling the CLOBBERs that
1838 are also in the PARALLEL. Later.
1840 If SET_P is nonzero, this is for the assignment hash table,
1841 otherwise it is for the expression hash table.
1842 If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
1843 not record any expressions. */
1845 static void
1846 hash_scan_insn (rtx insn, struct hash_table *table, int in_libcall_block)
1848 rtx pat = PATTERN (insn);
1849 int i;
1851 if (in_libcall_block)
1852 return;
1854 /* Pick out the sets of INSN and for other forms of instructions record
1855 what's been modified. */
1857 if (GET_CODE (pat) == SET)
1858 hash_scan_set (pat, insn, table);
1859 else if (GET_CODE (pat) == PARALLEL)
1860 for (i = 0; i < XVECLEN (pat, 0); i++)
1862 rtx x = XVECEXP (pat, 0, i);
1864 if (GET_CODE (x) == SET)
1865 hash_scan_set (x, insn, table);
1866 else if (GET_CODE (x) == CLOBBER)
1867 hash_scan_clobber (x, insn, table);
1868 else if (GET_CODE (x) == CALL)
1869 hash_scan_call (x, insn, table);
1872 else if (GET_CODE (pat) == CLOBBER)
1873 hash_scan_clobber (pat, insn, table);
1874 else if (GET_CODE (pat) == CALL)
1875 hash_scan_call (pat, insn, table);
1878 static void
1879 dump_hash_table (FILE *file, const char *name, struct hash_table *table)
1881 int i;
1882 /* Flattened out table, so it's printed in proper order. */
1883 struct expr **flat_table;
1884 unsigned int *hash_val;
1885 struct expr *expr;
1887 flat_table = xcalloc (table->n_elems, sizeof (struct expr *));
1888 hash_val = xmalloc (table->n_elems * sizeof (unsigned int));
1890 for (i = 0; i < (int) table->size; i++)
1891 for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
1893 flat_table[expr->bitmap_index] = expr;
1894 hash_val[expr->bitmap_index] = i;
1897 fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1898 name, table->size, table->n_elems);
1900 for (i = 0; i < (int) table->n_elems; i++)
1901 if (flat_table[i] != 0)
1903 expr = flat_table[i];
1904 fprintf (file, "Index %d (hash value %d)\n ",
1905 expr->bitmap_index, hash_val[i]);
1906 print_rtl (file, expr->expr);
1907 fprintf (file, "\n");
1910 fprintf (file, "\n");
1912 free (flat_table);
1913 free (hash_val);
1916 /* Record register first/last/block set information for REGNO in INSN.
1918 first_set records the first place in the block where the register
1919 is set and is used to compute "anticipatability".
1921 last_set records the last place in the block where the register
1922 is set and is used to compute "availability".
1924 last_bb records the block for which first_set and last_set are
1925 valid, as a quick test to invalidate them.
1927 reg_set_in_block records whether the register is set in the block
1928 and is used to compute "transparency". */
1930 static void
1931 record_last_reg_set_info (rtx insn, int regno)
1933 struct reg_avail_info *info = &reg_avail_info[regno];
1934 int cuid = INSN_CUID (insn);
1936 info->last_set = cuid;
1937 if (info->last_bb != current_bb)
1939 info->last_bb = current_bb;
1940 info->first_set = cuid;
1941 SET_BIT (reg_set_in_block[current_bb->index], regno);
1946 /* Record all of the canonicalized MEMs of record_last_mem_set_info's insn.
1947 Note we store a pair of elements in the list, so they have to be
1948 taken off pairwise. */
1950 static void
1951 canon_list_insert (rtx dest ATTRIBUTE_UNUSED, const_rtx unused1 ATTRIBUTE_UNUSED,
1952 void * v_insn)
1954 rtx dest_addr, insn;
1955 int bb;
1957 while (GET_CODE (dest) == SUBREG
1958 || GET_CODE (dest) == ZERO_EXTRACT
1959 || GET_CODE (dest) == STRICT_LOW_PART)
1960 dest = XEXP (dest, 0);
1962 /* If DEST is not a MEM, then it will not conflict with a load. Note
1963 that function calls are assumed to clobber memory, but are handled
1964 elsewhere. */
1966 if (! MEM_P (dest))
1967 return;
1969 dest_addr = get_addr (XEXP (dest, 0));
1970 dest_addr = canon_rtx (dest_addr);
1971 insn = (rtx) v_insn;
1972 bb = BLOCK_NUM (insn);
1974 canon_modify_mem_list[bb] =
1975 alloc_EXPR_LIST (VOIDmode, dest_addr, canon_modify_mem_list[bb]);
1976 canon_modify_mem_list[bb] =
1977 alloc_EXPR_LIST (VOIDmode, dest, canon_modify_mem_list[bb]);
1980 /* Record memory modification information for INSN. We do not actually care
1981 about the memory location(s) that are set, or even how they are set (consider
1982 a CALL_INSN). We merely need to record which insns modify memory. */
1984 static void
1985 record_last_mem_set_info (rtx insn)
1987 int bb = BLOCK_NUM (insn);
1989 /* load_killed_in_block_p will handle the case of calls clobbering
1990 everything. */
1991 modify_mem_list[bb] = alloc_INSN_LIST (insn, modify_mem_list[bb]);
1992 bitmap_set_bit (modify_mem_list_set, bb);
1994 if (CALL_P (insn))
1996 /* Note that traversals of this loop (other than for free-ing)
1997 will break after encountering a CALL_INSN. So, there's no
1998 need to insert a pair of items, as canon_list_insert does. */
1999 canon_modify_mem_list[bb] =
2000 alloc_INSN_LIST (insn, canon_modify_mem_list[bb]);
2001 bitmap_set_bit (blocks_with_calls, bb);
2003 else
2004 note_stores (PATTERN (insn), canon_list_insert, (void*) insn);
2007 /* Called from compute_hash_table via note_stores to handle one
2008 SET or CLOBBER in an insn. DATA is really the instruction in which
2009 the SET is taking place. */
2011 static void
2012 record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
2014 rtx last_set_insn = (rtx) data;
2016 if (GET_CODE (dest) == SUBREG)
2017 dest = SUBREG_REG (dest);
2019 if (REG_P (dest))
2020 record_last_reg_set_info (last_set_insn, REGNO (dest));
2021 else if (MEM_P (dest)
2022 /* Ignore pushes, they clobber nothing. */
2023 && ! push_operand (dest, GET_MODE (dest)))
2024 record_last_mem_set_info (last_set_insn);
2027 /* Top level function to create an expression or assignment hash table.
2029 Expression entries are placed in the hash table if
2030 - they are of the form (set (pseudo-reg) src),
2031 - src is something we want to perform GCSE on,
2032 - none of the operands are subsequently modified in the block
2034 Assignment entries are placed in the hash table if
2035 - they are of the form (set (pseudo-reg) src),
2036 - src is something we want to perform const/copy propagation on,
2037 - none of the operands or target are subsequently modified in the block
2039 Currently src must be a pseudo-reg or a const_int.
2041 TABLE is the table computed. */
2043 static void
2044 compute_hash_table_work (struct hash_table *table)
2046 unsigned int i;
2048 /* While we compute the hash table we also compute a bit array of which
2049 registers are set in which blocks.
2050 ??? This isn't needed during const/copy propagation, but it's cheap to
2051 compute. Later. */
2052 sbitmap_vector_zero (reg_set_in_block, last_basic_block);
2054 /* re-Cache any INSN_LIST nodes we have allocated. */
2055 clear_modify_mem_tables ();
2056 /* Some working arrays used to track first and last set in each block. */
2057 reg_avail_info = gmalloc (max_gcse_regno * sizeof (struct reg_avail_info));
2059 for (i = 0; i < max_gcse_regno; ++i)
2060 reg_avail_info[i].last_bb = NULL;
2062 FOR_EACH_BB (current_bb)
2064 rtx insn;
2065 unsigned int regno;
2066 int in_libcall_block;
2068 /* First pass over the instructions records information used to
2069 determine when registers and memory are first and last set.
2070 ??? hard-reg reg_set_in_block computation
2071 could be moved to compute_sets since they currently don't change. */
2073 FOR_BB_INSNS (current_bb, insn)
2075 if (! INSN_P (insn))
2076 continue;
2078 if (CALL_P (insn))
2080 HARD_REG_SET clobbered_regs;
2082 get_call_invalidated_used_regs (insn, &clobbered_regs, true);
2083 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2084 if (TEST_HARD_REG_BIT (clobbered_regs, regno))
2085 record_last_reg_set_info (insn, regno);
2087 mark_call (insn);
2090 note_stores (PATTERN (insn), record_last_set_info, insn);
2093 /* Insert implicit sets in the hash table. */
2094 if (table->set_p
2095 && implicit_sets[current_bb->index] != NULL_RTX)
2096 hash_scan_set (implicit_sets[current_bb->index],
2097 BB_HEAD (current_bb), table);
2099 /* The next pass builds the hash table. */
2100 in_libcall_block = 0;
2101 FOR_BB_INSNS (current_bb, insn)
2102 if (INSN_P (insn))
2104 if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2105 in_libcall_block = 1;
2106 else if (table->set_p && find_reg_note (insn, REG_RETVAL, NULL_RTX))
2107 in_libcall_block = 0;
2108 hash_scan_insn (insn, table, in_libcall_block);
2109 if (!table->set_p && find_reg_note (insn, REG_RETVAL, NULL_RTX))
2110 in_libcall_block = 0;
2114 free (reg_avail_info);
2115 reg_avail_info = NULL;
2118 /* Allocate space for the set/expr hash TABLE.
2119 N_INSNS is the number of instructions in the function.
2120 It is used to determine the number of buckets to use.
2121 SET_P determines whether set or expression table will
2122 be created. */
2124 static void
2125 alloc_hash_table (int n_insns, struct hash_table *table, int set_p)
2127 int n;
2129 table->size = n_insns / 4;
2130 if (table->size < 11)
2131 table->size = 11;
2133 /* Attempt to maintain efficient use of hash table.
2134 Making it an odd number is simplest for now.
2135 ??? Later take some measurements. */
2136 table->size |= 1;
2137 n = table->size * sizeof (struct expr *);
2138 table->table = gmalloc (n);
2139 table->set_p = set_p;
2142 /* Free things allocated by alloc_hash_table. */
2144 static void
2145 free_hash_table (struct hash_table *table)
2147 free (table->table);
2150 /* Compute the hash TABLE for doing copy/const propagation or
2151 expression hash table. */
2153 static void
2154 compute_hash_table (struct hash_table *table)
2156 /* Initialize count of number of entries in hash table. */
2157 table->n_elems = 0;
2158 memset (table->table, 0, table->size * sizeof (struct expr *));
2160 compute_hash_table_work (table);
2163 /* Expression tracking support. */
2165 /* Lookup REGNO in the set TABLE. The result is a pointer to the
2166 table entry, or NULL if not found. */
2168 static struct expr *
2169 lookup_set (unsigned int regno, struct hash_table *table)
2171 unsigned int hash = hash_set (regno, table->size);
2172 struct expr *expr;
2174 expr = table->table[hash];
2176 while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2177 expr = expr->next_same_hash;
2179 return expr;
2182 /* Return the next entry for REGNO in list EXPR. */
2184 static struct expr *
2185 next_set (unsigned int regno, struct expr *expr)
2188 expr = expr->next_same_hash;
2189 while (expr && REGNO (SET_DEST (expr->expr)) != regno);
2191 return expr;
2194 /* Like free_INSN_LIST_list or free_EXPR_LIST_list, except that the node
2195 types may be mixed. */
2197 static void
2198 free_insn_expr_list_list (rtx *listp)
2200 rtx list, next;
2202 for (list = *listp; list ; list = next)
2204 next = XEXP (list, 1);
2205 if (GET_CODE (list) == EXPR_LIST)
2206 free_EXPR_LIST_node (list);
2207 else
2208 free_INSN_LIST_node (list);
2211 *listp = NULL;
2214 /* Clear canon_modify_mem_list and modify_mem_list tables. */
2215 static void
2216 clear_modify_mem_tables (void)
2218 unsigned i;
2219 bitmap_iterator bi;
2221 EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
2223 free_INSN_LIST_list (modify_mem_list + i);
2224 free_insn_expr_list_list (canon_modify_mem_list + i);
2226 bitmap_clear (modify_mem_list_set);
2227 bitmap_clear (blocks_with_calls);
2230 /* Release memory used by modify_mem_list_set. */
2232 static void
2233 free_modify_mem_tables (void)
2235 clear_modify_mem_tables ();
2236 free (modify_mem_list);
2237 free (canon_modify_mem_list);
2238 modify_mem_list = 0;
2239 canon_modify_mem_list = 0;
2242 /* Reset tables used to keep track of what's still available [since the
2243 start of the block]. */
2245 static void
2246 reset_opr_set_tables (void)
2248 /* Maintain a bitmap of which regs have been set since beginning of
2249 the block. */
2250 CLEAR_REG_SET (reg_set_bitmap);
2252 /* Also keep a record of the last instruction to modify memory.
2253 For now this is very trivial, we only record whether any memory
2254 location has been modified. */
2255 clear_modify_mem_tables ();
2258 /* Return nonzero if the operands of X are not set before INSN in
2259 INSN's basic block. */
2261 static int
2262 oprs_not_set_p (const_rtx x, const_rtx insn)
2264 int i, j;
2265 enum rtx_code code;
2266 const char *fmt;
2268 if (x == 0)
2269 return 1;
2271 code = GET_CODE (x);
2272 switch (code)
2274 case PC:
2275 case CC0:
2276 case CONST:
2277 case CONST_INT:
2278 case CONST_DOUBLE:
2279 case CONST_FIXED:
2280 case CONST_VECTOR:
2281 case SYMBOL_REF:
2282 case LABEL_REF:
2283 case ADDR_VEC:
2284 case ADDR_DIFF_VEC:
2285 return 1;
2287 case MEM:
2288 if (load_killed_in_block_p (BLOCK_FOR_INSN (insn),
2289 INSN_CUID (insn), x, 0))
2290 return 0;
2291 else
2292 return oprs_not_set_p (XEXP (x, 0), insn);
2294 case REG:
2295 return ! REGNO_REG_SET_P (reg_set_bitmap, REGNO (x));
2297 default:
2298 break;
2301 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2303 if (fmt[i] == 'e')
2305 /* If we are about to do the last recursive call
2306 needed at this level, change it into iteration.
2307 This function is called enough to be worth it. */
2308 if (i == 0)
2309 return oprs_not_set_p (XEXP (x, i), insn);
2311 if (! oprs_not_set_p (XEXP (x, i), insn))
2312 return 0;
2314 else if (fmt[i] == 'E')
2315 for (j = 0; j < XVECLEN (x, i); j++)
2316 if (! oprs_not_set_p (XVECEXP (x, i, j), insn))
2317 return 0;
2320 return 1;
2323 /* Mark things set by a CALL. */
2325 static void
2326 mark_call (rtx insn)
2328 if (! RTL_CONST_OR_PURE_CALL_P (insn))
2329 record_last_mem_set_info (insn);
2332 /* Mark things set by a SET. */
2334 static void
2335 mark_set (rtx pat, rtx insn)
2337 rtx dest = SET_DEST (pat);
2339 while (GET_CODE (dest) == SUBREG
2340 || GET_CODE (dest) == ZERO_EXTRACT
2341 || GET_CODE (dest) == STRICT_LOW_PART)
2342 dest = XEXP (dest, 0);
2344 if (REG_P (dest))
2345 SET_REGNO_REG_SET (reg_set_bitmap, REGNO (dest));
2346 else if (MEM_P (dest))
2347 record_last_mem_set_info (insn);
2349 if (GET_CODE (SET_SRC (pat)) == CALL)
2350 mark_call (insn);
2353 /* Record things set by a CLOBBER. */
2355 static void
2356 mark_clobber (rtx pat, rtx insn)
2358 rtx clob = XEXP (pat, 0);
2360 while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2361 clob = XEXP (clob, 0);
2363 if (REG_P (clob))
2364 SET_REGNO_REG_SET (reg_set_bitmap, REGNO (clob));
2365 else
2366 record_last_mem_set_info (insn);
2369 /* Record things set by INSN.
2370 This data is used by oprs_not_set_p. */
2372 static void
2373 mark_oprs_set (rtx insn)
2375 rtx pat = PATTERN (insn);
2376 int i;
2378 if (GET_CODE (pat) == SET)
2379 mark_set (pat, insn);
2380 else if (GET_CODE (pat) == PARALLEL)
2381 for (i = 0; i < XVECLEN (pat, 0); i++)
2383 rtx x = XVECEXP (pat, 0, i);
2385 if (GET_CODE (x) == SET)
2386 mark_set (x, insn);
2387 else if (GET_CODE (x) == CLOBBER)
2388 mark_clobber (x, insn);
2389 else if (GET_CODE (x) == CALL)
2390 mark_call (insn);
2393 else if (GET_CODE (pat) == CLOBBER)
2394 mark_clobber (pat, insn);
2395 else if (GET_CODE (pat) == CALL)
2396 mark_call (insn);
2400 /* Compute copy/constant propagation working variables. */
2402 /* Local properties of assignments. */
2403 static sbitmap *cprop_pavloc;
2404 static sbitmap *cprop_absaltered;
2406 /* Global properties of assignments (computed from the local properties). */
2407 static sbitmap *cprop_avin;
2408 static sbitmap *cprop_avout;
2410 /* Allocate vars used for copy/const propagation. N_BLOCKS is the number of
2411 basic blocks. N_SETS is the number of sets. */
2413 static void
2414 alloc_cprop_mem (int n_blocks, int n_sets)
2416 cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
2417 cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
2419 cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
2420 cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
2423 /* Free vars used by copy/const propagation. */
2425 static void
2426 free_cprop_mem (void)
2428 sbitmap_vector_free (cprop_pavloc);
2429 sbitmap_vector_free (cprop_absaltered);
2430 sbitmap_vector_free (cprop_avin);
2431 sbitmap_vector_free (cprop_avout);
2434 /* For each block, compute whether X is transparent. X is either an
2435 expression or an assignment [though we don't care which, for this context
2436 an assignment is treated as an expression]. For each block where an
2437 element of X is modified, set (SET_P == 1) or reset (SET_P == 0) the INDX
2438 bit in BMAP. */
2440 static void
2441 compute_transp (const_rtx x, int indx, sbitmap *bmap, int set_p)
2443 int i, j;
2444 basic_block bb;
2445 enum rtx_code code;
2446 reg_set *r;
2447 const char *fmt;
2449 /* repeat is used to turn tail-recursion into iteration since GCC
2450 can't do it when there's no return value. */
2451 repeat:
2453 if (x == 0)
2454 return;
2456 code = GET_CODE (x);
2457 switch (code)
2459 case REG:
2460 if (set_p)
2462 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
2464 FOR_EACH_BB (bb)
2465 if (TEST_BIT (reg_set_in_block[bb->index], REGNO (x)))
2466 SET_BIT (bmap[bb->index], indx);
2468 else
2470 for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
2471 SET_BIT (bmap[r->bb_index], indx);
2474 else
2476 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
2478 FOR_EACH_BB (bb)
2479 if (TEST_BIT (reg_set_in_block[bb->index], REGNO (x)))
2480 RESET_BIT (bmap[bb->index], indx);
2482 else
2484 for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
2485 RESET_BIT (bmap[r->bb_index], indx);
2489 return;
2491 case MEM:
2492 if (! MEM_READONLY_P (x))
2494 bitmap_iterator bi;
2495 unsigned bb_index;
2497 /* First handle all the blocks with calls. We don't need to
2498 do any list walking for them. */
2499 EXECUTE_IF_SET_IN_BITMAP (blocks_with_calls, 0, bb_index, bi)
2501 if (set_p)
2502 SET_BIT (bmap[bb_index], indx);
2503 else
2504 RESET_BIT (bmap[bb_index], indx);
2507 /* Now iterate over the blocks which have memory modifications
2508 but which do not have any calls. */
2509 EXECUTE_IF_AND_COMPL_IN_BITMAP (modify_mem_list_set,
2510 blocks_with_calls,
2511 0, bb_index, bi)
2513 rtx list_entry = canon_modify_mem_list[bb_index];
2515 while (list_entry)
2517 rtx dest, dest_addr;
2519 /* LIST_ENTRY must be an INSN of some kind that sets memory.
2520 Examine each hunk of memory that is modified. */
2522 dest = XEXP (list_entry, 0);
2523 list_entry = XEXP (list_entry, 1);
2524 dest_addr = XEXP (list_entry, 0);
2526 if (canon_true_dependence (dest, GET_MODE (dest), dest_addr,
2527 x, rtx_addr_varies_p))
2529 if (set_p)
2530 SET_BIT (bmap[bb_index], indx);
2531 else
2532 RESET_BIT (bmap[bb_index], indx);
2533 break;
2535 list_entry = XEXP (list_entry, 1);
2540 x = XEXP (x, 0);
2541 goto repeat;
2543 case PC:
2544 case CC0: /*FIXME*/
2545 case CONST:
2546 case CONST_INT:
2547 case CONST_DOUBLE:
2548 case CONST_FIXED:
2549 case CONST_VECTOR:
2550 case SYMBOL_REF:
2551 case LABEL_REF:
2552 case ADDR_VEC:
2553 case ADDR_DIFF_VEC:
2554 return;
2556 default:
2557 break;
2560 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2562 if (fmt[i] == 'e')
2564 /* If we are about to do the last recursive call
2565 needed at this level, change it into iteration.
2566 This function is called enough to be worth it. */
2567 if (i == 0)
2569 x = XEXP (x, i);
2570 goto repeat;
2573 compute_transp (XEXP (x, i), indx, bmap, set_p);
2575 else if (fmt[i] == 'E')
2576 for (j = 0; j < XVECLEN (x, i); j++)
2577 compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
2581 /* Top level routine to do the dataflow analysis needed by copy/const
2582 propagation. */
2584 static void
2585 compute_cprop_data (void)
2587 compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, &set_hash_table);
2588 compute_available (cprop_pavloc, cprop_absaltered,
2589 cprop_avout, cprop_avin);
2592 /* Copy/constant propagation. */
2594 /* Maximum number of register uses in an insn that we handle. */
2595 #define MAX_USES 8
2597 /* Table of uses found in an insn.
2598 Allocated statically to avoid alloc/free complexity and overhead. */
2599 static struct reg_use reg_use_table[MAX_USES];
2601 /* Index into `reg_use_table' while building it. */
2602 static int reg_use_count;
2604 /* Set up a list of register numbers used in INSN. The found uses are stored
2605 in `reg_use_table'. `reg_use_count' is initialized to zero before entry,
2606 and contains the number of uses in the table upon exit.
2608 ??? If a register appears multiple times we will record it multiple times.
2609 This doesn't hurt anything but it will slow things down. */
2611 static void
2612 find_used_regs (rtx *xptr, void *data ATTRIBUTE_UNUSED)
2614 int i, j;
2615 enum rtx_code code;
2616 const char *fmt;
2617 rtx x = *xptr;
2619 /* repeat is used to turn tail-recursion into iteration since GCC
2620 can't do it when there's no return value. */
2621 repeat:
2622 if (x == 0)
2623 return;
2625 code = GET_CODE (x);
2626 if (REG_P (x))
2628 if (reg_use_count == MAX_USES)
2629 return;
2631 reg_use_table[reg_use_count].reg_rtx = x;
2632 reg_use_count++;
2635 /* Recursively scan the operands of this expression. */
2637 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2639 if (fmt[i] == 'e')
2641 /* If we are about to do the last recursive call
2642 needed at this level, change it into iteration.
2643 This function is called enough to be worth it. */
2644 if (i == 0)
2646 x = XEXP (x, 0);
2647 goto repeat;
2650 find_used_regs (&XEXP (x, i), data);
2652 else if (fmt[i] == 'E')
2653 for (j = 0; j < XVECLEN (x, i); j++)
2654 find_used_regs (&XVECEXP (x, i, j), data);
2658 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
2659 Returns nonzero is successful. */
2661 static int
2662 try_replace_reg (rtx from, rtx to, rtx insn)
2664 rtx note = find_reg_equal_equiv_note (insn);
2665 rtx src = 0;
2666 int success = 0;
2667 rtx set = single_set (insn);
2669 /* Usually we substitute easy stuff, so we won't copy everything.
2670 We however need to take care to not duplicate non-trivial CONST
2671 expressions. */
2672 to = copy_rtx (to);
2674 validate_replace_src_group (from, to, insn);
2675 if (num_changes_pending () && apply_change_group ())
2676 success = 1;
2678 /* Try to simplify SET_SRC if we have substituted a constant. */
2679 if (success && set && CONSTANT_P (to))
2681 src = simplify_rtx (SET_SRC (set));
2683 if (src)
2684 validate_change (insn, &SET_SRC (set), src, 0);
2687 /* If there is already a REG_EQUAL note, update the expression in it
2688 with our replacement. */
2689 if (note != 0 && REG_NOTE_KIND (note) == REG_EQUAL)
2690 set_unique_reg_note (insn, REG_EQUAL,
2691 simplify_replace_rtx (XEXP (note, 0), from,
2692 copy_rtx (to)));
2693 if (!success && set && reg_mentioned_p (from, SET_SRC (set)))
2695 /* If above failed and this is a single set, try to simplify the source of
2696 the set given our substitution. We could perhaps try this for multiple
2697 SETs, but it probably won't buy us anything. */
2698 src = simplify_replace_rtx (SET_SRC (set), from, to);
2700 if (!rtx_equal_p (src, SET_SRC (set))
2701 && validate_change (insn, &SET_SRC (set), src, 0))
2702 success = 1;
2704 /* If we've failed to do replacement, have a single SET, don't already
2705 have a note, and have no special SET, add a REG_EQUAL note to not
2706 lose information. */
2707 if (!success && note == 0 && set != 0
2708 && GET_CODE (SET_DEST (set)) != ZERO_EXTRACT
2709 && GET_CODE (SET_DEST (set)) != STRICT_LOW_PART)
2710 note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
2713 /* REG_EQUAL may get simplified into register.
2714 We don't allow that. Remove that note. This code ought
2715 not to happen, because previous code ought to synthesize
2716 reg-reg move, but be on the safe side. */
2717 if (note && REG_NOTE_KIND (note) == REG_EQUAL && REG_P (XEXP (note, 0)))
2718 remove_note (insn, note);
2720 return success;
2723 /* Find a set of REGNOs that are available on entry to INSN's block. Returns
2724 NULL no such set is found. */
2726 static struct expr *
2727 find_avail_set (int regno, rtx insn)
2729 /* SET1 contains the last set found that can be returned to the caller for
2730 use in a substitution. */
2731 struct expr *set1 = 0;
2733 /* Loops are not possible here. To get a loop we would need two sets
2734 available at the start of the block containing INSN. i.e. we would
2735 need two sets like this available at the start of the block:
2737 (set (reg X) (reg Y))
2738 (set (reg Y) (reg X))
2740 This can not happen since the set of (reg Y) would have killed the
2741 set of (reg X) making it unavailable at the start of this block. */
2742 while (1)
2744 rtx src;
2745 struct expr *set = lookup_set (regno, &set_hash_table);
2747 /* Find a set that is available at the start of the block
2748 which contains INSN. */
2749 while (set)
2751 if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
2752 break;
2753 set = next_set (regno, set);
2756 /* If no available set was found we've reached the end of the
2757 (possibly empty) copy chain. */
2758 if (set == 0)
2759 break;
2761 gcc_assert (GET_CODE (set->expr) == SET);
2763 src = SET_SRC (set->expr);
2765 /* We know the set is available.
2766 Now check that SRC is ANTLOC (i.e. none of the source operands
2767 have changed since the start of the block).
2769 If the source operand changed, we may still use it for the next
2770 iteration of this loop, but we may not use it for substitutions. */
2772 if (gcse_constant_p (src) || oprs_not_set_p (src, insn))
2773 set1 = set;
2775 /* If the source of the set is anything except a register, then
2776 we have reached the end of the copy chain. */
2777 if (! REG_P (src))
2778 break;
2780 /* Follow the copy chain, i.e. start another iteration of the loop
2781 and see if we have an available copy into SRC. */
2782 regno = REGNO (src);
2785 /* SET1 holds the last set that was available and anticipatable at
2786 INSN. */
2787 return set1;
2790 /* Subroutine of cprop_insn that tries to propagate constants into
2791 JUMP_INSNS. JUMP must be a conditional jump. If SETCC is non-NULL
2792 it is the instruction that immediately precedes JUMP, and must be a
2793 single SET of a register. FROM is what we will try to replace,
2794 SRC is the constant we will try to substitute for it. Returns nonzero
2795 if a change was made. */
2797 static int
2798 cprop_jump (basic_block bb, rtx setcc, rtx jump, rtx from, rtx src)
2800 rtx new, set_src, note_src;
2801 rtx set = pc_set (jump);
2802 rtx note = find_reg_equal_equiv_note (jump);
2804 if (note)
2806 note_src = XEXP (note, 0);
2807 if (GET_CODE (note_src) == EXPR_LIST)
2808 note_src = NULL_RTX;
2810 else note_src = NULL_RTX;
2812 /* Prefer REG_EQUAL notes except those containing EXPR_LISTs. */
2813 set_src = note_src ? note_src : SET_SRC (set);
2815 /* First substitute the SETCC condition into the JUMP instruction,
2816 then substitute that given values into this expanded JUMP. */
2817 if (setcc != NULL_RTX
2818 && !modified_between_p (from, setcc, jump)
2819 && !modified_between_p (src, setcc, jump))
2821 rtx setcc_src;
2822 rtx setcc_set = single_set (setcc);
2823 rtx setcc_note = find_reg_equal_equiv_note (setcc);
2824 setcc_src = (setcc_note && GET_CODE (XEXP (setcc_note, 0)) != EXPR_LIST)
2825 ? XEXP (setcc_note, 0) : SET_SRC (setcc_set);
2826 set_src = simplify_replace_rtx (set_src, SET_DEST (setcc_set),
2827 setcc_src);
2829 else
2830 setcc = NULL_RTX;
2832 new = simplify_replace_rtx (set_src, from, src);
2834 /* If no simplification can be made, then try the next register. */
2835 if (rtx_equal_p (new, SET_SRC (set)))
2836 return 0;
2838 /* If this is now a no-op delete it, otherwise this must be a valid insn. */
2839 if (new == pc_rtx)
2840 delete_insn (jump);
2841 else
2843 /* Ensure the value computed inside the jump insn to be equivalent
2844 to one computed by setcc. */
2845 if (setcc && modified_in_p (new, setcc))
2846 return 0;
2847 if (! validate_unshare_change (jump, &SET_SRC (set), new, 0))
2849 /* When (some) constants are not valid in a comparison, and there
2850 are two registers to be replaced by constants before the entire
2851 comparison can be folded into a constant, we need to keep
2852 intermediate information in REG_EQUAL notes. For targets with
2853 separate compare insns, such notes are added by try_replace_reg.
2854 When we have a combined compare-and-branch instruction, however,
2855 we need to attach a note to the branch itself to make this
2856 optimization work. */
2858 if (!rtx_equal_p (new, note_src))
2859 set_unique_reg_note (jump, REG_EQUAL, copy_rtx (new));
2860 return 0;
2863 /* Remove REG_EQUAL note after simplification. */
2864 if (note_src)
2865 remove_note (jump, note);
2868 #ifdef HAVE_cc0
2869 /* Delete the cc0 setter. */
2870 if (setcc != NULL && CC0_P (SET_DEST (single_set (setcc))))
2871 delete_insn (setcc);
2872 #endif
2874 run_jump_opt_after_gcse = 1;
2876 global_const_prop_count++;
2877 if (dump_file != NULL)
2879 fprintf (dump_file,
2880 "GLOBAL CONST-PROP: Replacing reg %d in jump_insn %d with constant ",
2881 REGNO (from), INSN_UID (jump));
2882 print_rtl (dump_file, src);
2883 fprintf (dump_file, "\n");
2885 purge_dead_edges (bb);
2887 /* If a conditional jump has been changed into unconditional jump, remove
2888 the jump and make the edge fallthru - this is always called in
2889 cfglayout mode. */
2890 if (new != pc_rtx && simplejump_p (jump))
2892 edge e;
2893 edge_iterator ei;
2895 for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); ei_next (&ei))
2896 if (e->dest != EXIT_BLOCK_PTR
2897 && BB_HEAD (e->dest) == JUMP_LABEL (jump))
2899 e->flags |= EDGE_FALLTHRU;
2900 break;
2902 delete_insn (jump);
2905 return 1;
2908 static bool
2909 constprop_register (rtx insn, rtx from, rtx to, bool alter_jumps)
2911 rtx sset;
2913 /* Check for reg or cc0 setting instructions followed by
2914 conditional branch instructions first. */
2915 if (alter_jumps
2916 && (sset = single_set (insn)) != NULL
2917 && NEXT_INSN (insn)
2918 && any_condjump_p (NEXT_INSN (insn)) && onlyjump_p (NEXT_INSN (insn)))
2920 rtx dest = SET_DEST (sset);
2921 if ((REG_P (dest) || CC0_P (dest))
2922 && cprop_jump (BLOCK_FOR_INSN (insn), insn, NEXT_INSN (insn), from, to))
2923 return 1;
2926 /* Handle normal insns next. */
2927 if (NONJUMP_INSN_P (insn)
2928 && try_replace_reg (from, to, insn))
2929 return 1;
2931 /* Try to propagate a CONST_INT into a conditional jump.
2932 We're pretty specific about what we will handle in this
2933 code, we can extend this as necessary over time.
2935 Right now the insn in question must look like
2936 (set (pc) (if_then_else ...)) */
2937 else if (alter_jumps && any_condjump_p (insn) && onlyjump_p (insn))
2938 return cprop_jump (BLOCK_FOR_INSN (insn), NULL, insn, from, to);
2939 return 0;
2942 /* Perform constant and copy propagation on INSN.
2943 The result is nonzero if a change was made. */
2945 static int
2946 cprop_insn (rtx insn, int alter_jumps)
2948 struct reg_use *reg_used;
2949 int changed = 0;
2950 rtx note;
2952 if (!INSN_P (insn))
2953 return 0;
2955 reg_use_count = 0;
2956 note_uses (&PATTERN (insn), find_used_regs, NULL);
2958 note = find_reg_equal_equiv_note (insn);
2960 /* We may win even when propagating constants into notes. */
2961 if (note)
2962 find_used_regs (&XEXP (note, 0), NULL);
2964 for (reg_used = &reg_use_table[0]; reg_use_count > 0;
2965 reg_used++, reg_use_count--)
2967 unsigned int regno = REGNO (reg_used->reg_rtx);
2968 rtx pat, src;
2969 struct expr *set;
2971 /* Ignore registers created by GCSE.
2972 We do this because ... */
2973 if (regno >= max_gcse_regno)
2974 continue;
2976 /* If the register has already been set in this block, there's
2977 nothing we can do. */
2978 if (! oprs_not_set_p (reg_used->reg_rtx, insn))
2979 continue;
2981 /* Find an assignment that sets reg_used and is available
2982 at the start of the block. */
2983 set = find_avail_set (regno, insn);
2984 if (! set)
2985 continue;
2987 pat = set->expr;
2988 /* ??? We might be able to handle PARALLELs. Later. */
2989 gcc_assert (GET_CODE (pat) == SET);
2991 src = SET_SRC (pat);
2993 /* Constant propagation. */
2994 if (gcse_constant_p (src))
2996 if (constprop_register (insn, reg_used->reg_rtx, src, alter_jumps))
2998 changed = 1;
2999 global_const_prop_count++;
3000 if (dump_file != NULL)
3002 fprintf (dump_file, "GLOBAL CONST-PROP: Replacing reg %d in ", regno);
3003 fprintf (dump_file, "insn %d with constant ", INSN_UID (insn));
3004 print_rtl (dump_file, src);
3005 fprintf (dump_file, "\n");
3007 if (INSN_DELETED_P (insn))
3008 return 1;
3011 else if (REG_P (src)
3012 && REGNO (src) >= FIRST_PSEUDO_REGISTER
3013 && REGNO (src) != regno)
3015 if (try_replace_reg (reg_used->reg_rtx, src, insn))
3017 changed = 1;
3018 global_copy_prop_count++;
3019 if (dump_file != NULL)
3021 fprintf (dump_file, "GLOBAL COPY-PROP: Replacing reg %d in insn %d",
3022 regno, INSN_UID (insn));
3023 fprintf (dump_file, " with reg %d\n", REGNO (src));
3026 /* The original insn setting reg_used may or may not now be
3027 deletable. We leave the deletion to flow. */
3028 /* FIXME: If it turns out that the insn isn't deletable,
3029 then we may have unnecessarily extended register lifetimes
3030 and made things worse. */
3035 return changed;
3038 /* Like find_used_regs, but avoid recording uses that appear in
3039 input-output contexts such as zero_extract or pre_dec. This
3040 restricts the cases we consider to those for which local cprop
3041 can legitimately make replacements. */
3043 static void
3044 local_cprop_find_used_regs (rtx *xptr, void *data)
3046 rtx x = *xptr;
3048 if (x == 0)
3049 return;
3051 switch (GET_CODE (x))
3053 case ZERO_EXTRACT:
3054 case SIGN_EXTRACT:
3055 case STRICT_LOW_PART:
3056 return;
3058 case PRE_DEC:
3059 case PRE_INC:
3060 case POST_DEC:
3061 case POST_INC:
3062 case PRE_MODIFY:
3063 case POST_MODIFY:
3064 /* Can only legitimately appear this early in the context of
3065 stack pushes for function arguments, but handle all of the
3066 codes nonetheless. */
3067 return;
3069 case SUBREG:
3070 /* Setting a subreg of a register larger than word_mode leaves
3071 the non-written words unchanged. */
3072 if (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (x))) > BITS_PER_WORD)
3073 return;
3074 break;
3076 default:
3077 break;
3080 find_used_regs (xptr, data);
3083 /* LIBCALL_SP is a zero-terminated array of insns at the end of a libcall;
3084 their REG_EQUAL notes need updating. */
3086 static bool
3087 do_local_cprop (rtx x, rtx insn, bool alter_jumps, rtx *libcall_sp)
3089 rtx newreg = NULL, newcnst = NULL;
3091 /* Rule out USE instructions and ASM statements as we don't want to
3092 change the hard registers mentioned. */
3093 if (REG_P (x)
3094 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
3095 || (GET_CODE (PATTERN (insn)) != USE
3096 && asm_noperands (PATTERN (insn)) < 0)))
3098 cselib_val *val = cselib_lookup (x, GET_MODE (x), 0);
3099 struct elt_loc_list *l;
3101 if (!val)
3102 return false;
3103 for (l = val->locs; l; l = l->next)
3105 rtx this_rtx = l->loc;
3106 rtx note;
3108 /* Don't CSE non-constant values out of libcall blocks. */
3109 if (l->in_libcall && ! CONSTANT_P (this_rtx))
3110 continue;
3112 if (gcse_constant_p (this_rtx))
3113 newcnst = this_rtx;
3114 if (REG_P (this_rtx) && REGNO (this_rtx) >= FIRST_PSEUDO_REGISTER
3115 /* Don't copy propagate if it has attached REG_EQUIV note.
3116 At this point this only function parameters should have
3117 REG_EQUIV notes and if the argument slot is used somewhere
3118 explicitly, it means address of parameter has been taken,
3119 so we should not extend the lifetime of the pseudo. */
3120 && (!(note = find_reg_note (l->setting_insn, REG_EQUIV, NULL_RTX))
3121 || ! MEM_P (XEXP (note, 0))))
3122 newreg = this_rtx;
3124 if (newcnst && constprop_register (insn, x, newcnst, alter_jumps))
3126 /* If we find a case where we can't fix the retval REG_EQUAL notes
3127 match the new register, we either have to abandon this replacement
3128 or fix delete_trivially_dead_insns to preserve the setting insn,
3129 or make it delete the REG_EQUAL note, and fix up all passes that
3130 require the REG_EQUAL note there. */
3131 bool adjusted;
3133 adjusted = adjust_libcall_notes (x, newcnst, insn, libcall_sp);
3134 gcc_assert (adjusted);
3136 if (dump_file != NULL)
3138 fprintf (dump_file, "LOCAL CONST-PROP: Replacing reg %d in ",
3139 REGNO (x));
3140 fprintf (dump_file, "insn %d with constant ",
3141 INSN_UID (insn));
3142 print_rtl (dump_file, newcnst);
3143 fprintf (dump_file, "\n");
3145 local_const_prop_count++;
3146 return true;
3148 else if (newreg && newreg != x && try_replace_reg (x, newreg, insn))
3150 adjust_libcall_notes (x, newreg, insn, libcall_sp);
3151 if (dump_file != NULL)
3153 fprintf (dump_file,
3154 "LOCAL COPY-PROP: Replacing reg %d in insn %d",
3155 REGNO (x), INSN_UID (insn));
3156 fprintf (dump_file, " with reg %d\n", REGNO (newreg));
3158 local_copy_prop_count++;
3159 return true;
3162 return false;
3165 /* LIBCALL_SP is a zero-terminated array of insns at the end of a libcall;
3166 their REG_EQUAL notes need updating to reflect that OLDREG has been
3167 replaced with NEWVAL in INSN. Return true if all substitutions could
3168 be made. */
3169 static bool
3170 adjust_libcall_notes (rtx oldreg, rtx newval, rtx insn, rtx *libcall_sp)
3172 rtx end;
3174 while ((end = *libcall_sp++))
3176 rtx note = find_reg_equal_equiv_note (end);
3178 if (! note)
3179 continue;
3181 if (REG_P (newval))
3183 if (reg_set_between_p (newval, PREV_INSN (insn), end))
3187 note = find_reg_equal_equiv_note (end);
3188 if (! note)
3189 continue;
3190 if (reg_mentioned_p (newval, XEXP (note, 0)))
3191 return false;
3193 while ((end = *libcall_sp++));
3194 return true;
3197 XEXP (note, 0) = simplify_replace_rtx (XEXP (note, 0), oldreg, newval);
3198 df_notes_rescan (end);
3199 insn = end;
3201 return true;
3204 #define MAX_NESTED_LIBCALLS 9
3206 /* Do local const/copy propagation (i.e. within each basic block).
3207 If ALTER_JUMPS is true, allow propagating into jump insns, which
3208 could modify the CFG. */
3210 static void
3211 local_cprop_pass (bool alter_jumps)
3213 basic_block bb;
3214 rtx insn;
3215 struct reg_use *reg_used;
3216 rtx libcall_stack[MAX_NESTED_LIBCALLS + 1], *libcall_sp;
3217 bool changed = false;
3219 cselib_init (false);
3220 libcall_sp = &libcall_stack[MAX_NESTED_LIBCALLS];
3221 *libcall_sp = 0;
3222 FOR_EACH_BB (bb)
3224 FOR_BB_INSNS (bb, insn)
3226 if (INSN_P (insn))
3228 rtx note = find_reg_note (insn, REG_LIBCALL, NULL_RTX);
3230 if (note)
3232 gcc_assert (libcall_sp != libcall_stack);
3233 *--libcall_sp = XEXP (note, 0);
3235 note = find_reg_note (insn, REG_RETVAL, NULL_RTX);
3236 if (note)
3237 libcall_sp++;
3238 note = find_reg_equal_equiv_note (insn);
3241 reg_use_count = 0;
3242 note_uses (&PATTERN (insn), local_cprop_find_used_regs,
3243 NULL);
3244 if (note)
3245 local_cprop_find_used_regs (&XEXP (note, 0), NULL);
3247 for (reg_used = &reg_use_table[0]; reg_use_count > 0;
3248 reg_used++, reg_use_count--)
3250 if (do_local_cprop (reg_used->reg_rtx, insn, alter_jumps,
3251 libcall_sp))
3253 changed = true;
3254 break;
3257 if (INSN_DELETED_P (insn))
3258 break;
3260 while (reg_use_count);
3262 cselib_process_insn (insn);
3265 /* Forget everything at the end of a basic block. Make sure we are
3266 not inside a libcall, they should never cross basic blocks. */
3267 cselib_clear_table ();
3268 gcc_assert (libcall_sp == &libcall_stack[MAX_NESTED_LIBCALLS]);
3271 cselib_finish ();
3273 /* Global analysis may get into infinite loops for unreachable blocks. */
3274 if (changed && alter_jumps)
3276 delete_unreachable_blocks ();
3277 free_reg_set_mem ();
3278 alloc_reg_set_mem (max_reg_num ());
3279 compute_sets ();
3283 /* Forward propagate copies. This includes copies and constants. Return
3284 nonzero if a change was made. */
3286 static int
3287 cprop (int alter_jumps)
3289 int changed;
3290 basic_block bb;
3291 rtx insn;
3293 /* Note we start at block 1. */
3294 if (ENTRY_BLOCK_PTR->next_bb == EXIT_BLOCK_PTR)
3296 if (dump_file != NULL)
3297 fprintf (dump_file, "\n");
3298 return 0;
3301 changed = 0;
3302 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR->next_bb->next_bb, EXIT_BLOCK_PTR, next_bb)
3304 /* Reset tables used to keep track of what's still valid [since the
3305 start of the block]. */
3306 reset_opr_set_tables ();
3308 FOR_BB_INSNS (bb, insn)
3309 if (INSN_P (insn))
3311 changed |= cprop_insn (insn, alter_jumps);
3313 /* Keep track of everything modified by this insn. */
3314 /* ??? Need to be careful w.r.t. mods done to INSN. Don't
3315 call mark_oprs_set if we turned the insn into a NOTE. */
3316 if (! NOTE_P (insn))
3317 mark_oprs_set (insn);
3321 if (dump_file != NULL)
3322 fprintf (dump_file, "\n");
3324 return changed;
3327 /* Similar to get_condition, only the resulting condition must be
3328 valid at JUMP, instead of at EARLIEST.
3330 This differs from noce_get_condition in ifcvt.c in that we prefer not to
3331 settle for the condition variable in the jump instruction being integral.
3332 We prefer to be able to record the value of a user variable, rather than
3333 the value of a temporary used in a condition. This could be solved by
3334 recording the value of *every* register scanned by canonicalize_condition,
3335 but this would require some code reorganization. */
3338 fis_get_condition (rtx jump)
3340 return get_condition (jump, NULL, false, true);
3343 /* Check the comparison COND to see if we can safely form an implicit set from
3344 it. COND is either an EQ or NE comparison. */
3346 static bool
3347 implicit_set_cond_p (const_rtx cond)
3349 const enum machine_mode mode = GET_MODE (XEXP (cond, 0));
3350 const_rtx cst = XEXP (cond, 1);
3352 /* We can't perform this optimization if either operand might be or might
3353 contain a signed zero. */
3354 if (HONOR_SIGNED_ZEROS (mode))
3356 /* It is sufficient to check if CST is or contains a zero. We must
3357 handle float, complex, and vector. If any subpart is a zero, then
3358 the optimization can't be performed. */
3359 /* ??? The complex and vector checks are not implemented yet. We just
3360 always return zero for them. */
3361 if (GET_CODE (cst) == CONST_DOUBLE)
3363 REAL_VALUE_TYPE d;
3364 REAL_VALUE_FROM_CONST_DOUBLE (d, cst);
3365 if (REAL_VALUES_EQUAL (d, dconst0))
3366 return 0;
3368 else
3369 return 0;
3372 return gcse_constant_p (cst);
3375 /* Find the implicit sets of a function. An "implicit set" is a constraint
3376 on the value of a variable, implied by a conditional jump. For example,
3377 following "if (x == 2)", the then branch may be optimized as though the
3378 conditional performed an "explicit set", in this example, "x = 2". This
3379 function records the set patterns that are implicit at the start of each
3380 basic block. */
3382 static void
3383 find_implicit_sets (void)
3385 basic_block bb, dest;
3386 unsigned int count;
3387 rtx cond, new;
3389 count = 0;
3390 FOR_EACH_BB (bb)
3391 /* Check for more than one successor. */
3392 if (EDGE_COUNT (bb->succs) > 1)
3394 cond = fis_get_condition (BB_END (bb));
3396 if (cond
3397 && (GET_CODE (cond) == EQ || GET_CODE (cond) == NE)
3398 && REG_P (XEXP (cond, 0))
3399 && REGNO (XEXP (cond, 0)) >= FIRST_PSEUDO_REGISTER
3400 && implicit_set_cond_p (cond))
3402 dest = GET_CODE (cond) == EQ ? BRANCH_EDGE (bb)->dest
3403 : FALLTHRU_EDGE (bb)->dest;
3405 if (dest && single_pred_p (dest)
3406 && dest != EXIT_BLOCK_PTR)
3408 new = gen_rtx_SET (VOIDmode, XEXP (cond, 0),
3409 XEXP (cond, 1));
3410 implicit_sets[dest->index] = new;
3411 if (dump_file)
3413 fprintf(dump_file, "Implicit set of reg %d in ",
3414 REGNO (XEXP (cond, 0)));
3415 fprintf(dump_file, "basic block %d\n", dest->index);
3417 count++;
3422 if (dump_file)
3423 fprintf (dump_file, "Found %d implicit sets\n", count);
3426 /* Perform one copy/constant propagation pass.
3427 PASS is the pass count. If CPROP_JUMPS is true, perform constant
3428 propagation into conditional jumps. If BYPASS_JUMPS is true,
3429 perform conditional jump bypassing optimizations. */
3431 static int
3432 one_cprop_pass (int pass, bool cprop_jumps, bool bypass_jumps)
3434 int changed = 0;
3436 global_const_prop_count = local_const_prop_count = 0;
3437 global_copy_prop_count = local_copy_prop_count = 0;
3439 if (cprop_jumps)
3440 local_cprop_pass (cprop_jumps);
3442 /* Determine implicit sets. */
3443 implicit_sets = XCNEWVEC (rtx, last_basic_block);
3444 find_implicit_sets ();
3446 alloc_hash_table (max_cuid, &set_hash_table, 1);
3447 compute_hash_table (&set_hash_table);
3449 /* Free implicit_sets before peak usage. */
3450 free (implicit_sets);
3451 implicit_sets = NULL;
3453 if (dump_file)
3454 dump_hash_table (dump_file, "SET", &set_hash_table);
3455 if (set_hash_table.n_elems > 0)
3457 alloc_cprop_mem (last_basic_block, set_hash_table.n_elems);
3458 compute_cprop_data ();
3459 changed = cprop (cprop_jumps);
3460 if (bypass_jumps)
3461 changed |= bypass_conditional_jumps ();
3462 free_cprop_mem ();
3465 free_hash_table (&set_hash_table);
3467 if (dump_file)
3469 fprintf (dump_file, "CPROP of %s, pass %d: %d bytes needed, ",
3470 current_function_name (), pass, bytes_used);
3471 fprintf (dump_file, "%d local const props, %d local copy props, ",
3472 local_const_prop_count, local_copy_prop_count);
3473 fprintf (dump_file, "%d global const props, %d global copy props\n\n",
3474 global_const_prop_count, global_copy_prop_count);
3476 /* Global analysis may get into infinite loops for unreachable blocks. */
3477 if (changed && cprop_jumps)
3478 delete_unreachable_blocks ();
3480 return changed;
3483 /* Bypass conditional jumps. */
3485 /* The value of last_basic_block at the beginning of the jump_bypass
3486 pass. The use of redirect_edge_and_branch_force may introduce new
3487 basic blocks, but the data flow analysis is only valid for basic
3488 block indices less than bypass_last_basic_block. */
3490 static int bypass_last_basic_block;
3492 /* Find a set of REGNO to a constant that is available at the end of basic
3493 block BB. Returns NULL if no such set is found. Based heavily upon
3494 find_avail_set. */
3496 static struct expr *
3497 find_bypass_set (int regno, int bb)
3499 struct expr *result = 0;
3501 for (;;)
3503 rtx src;
3504 struct expr *set = lookup_set (regno, &set_hash_table);
3506 while (set)
3508 if (TEST_BIT (cprop_avout[bb], set->bitmap_index))
3509 break;
3510 set = next_set (regno, set);
3513 if (set == 0)
3514 break;
3516 gcc_assert (GET_CODE (set->expr) == SET);
3518 src = SET_SRC (set->expr);
3519 if (gcse_constant_p (src))
3520 result = set;
3522 if (! REG_P (src))
3523 break;
3525 regno = REGNO (src);
3527 return result;
3531 /* Subroutine of bypass_block that checks whether a pseudo is killed by
3532 any of the instructions inserted on an edge. Jump bypassing places
3533 condition code setters on CFG edges using insert_insn_on_edge. This
3534 function is required to check that our data flow analysis is still
3535 valid prior to commit_edge_insertions. */
3537 static bool
3538 reg_killed_on_edge (const_rtx reg, const_edge e)
3540 rtx insn;
3542 for (insn = e->insns.r; insn; insn = NEXT_INSN (insn))
3543 if (INSN_P (insn) && reg_set_p (reg, insn))
3544 return true;
3546 return false;
3549 /* Subroutine of bypass_conditional_jumps that attempts to bypass the given
3550 basic block BB which has more than one predecessor. If not NULL, SETCC
3551 is the first instruction of BB, which is immediately followed by JUMP_INSN
3552 JUMP. Otherwise, SETCC is NULL, and JUMP is the first insn of BB.
3553 Returns nonzero if a change was made.
3555 During the jump bypassing pass, we may place copies of SETCC instructions
3556 on CFG edges. The following routine must be careful to pay attention to
3557 these inserted insns when performing its transformations. */
3559 static int
3560 bypass_block (basic_block bb, rtx setcc, rtx jump)
3562 rtx insn, note;
3563 edge e, edest;
3564 int i, change;
3565 int may_be_loop_header;
3566 unsigned removed_p;
3567 edge_iterator ei;
3569 insn = (setcc != NULL) ? setcc : jump;
3571 /* Determine set of register uses in INSN. */
3572 reg_use_count = 0;
3573 note_uses (&PATTERN (insn), find_used_regs, NULL);
3574 note = find_reg_equal_equiv_note (insn);
3575 if (note)
3576 find_used_regs (&XEXP (note, 0), NULL);
3578 may_be_loop_header = false;
3579 FOR_EACH_EDGE (e, ei, bb->preds)
3580 if (e->flags & EDGE_DFS_BACK)
3582 may_be_loop_header = true;
3583 break;
3586 change = 0;
3587 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei)); )
3589 removed_p = 0;
3591 if (e->flags & EDGE_COMPLEX)
3593 ei_next (&ei);
3594 continue;
3597 /* We can't redirect edges from new basic blocks. */
3598 if (e->src->index >= bypass_last_basic_block)
3600 ei_next (&ei);
3601 continue;
3604 /* The irreducible loops created by redirecting of edges entering the
3605 loop from outside would decrease effectiveness of some of the following
3606 optimizations, so prevent this. */
3607 if (may_be_loop_header
3608 && !(e->flags & EDGE_DFS_BACK))
3610 ei_next (&ei);
3611 continue;
3614 for (i = 0; i < reg_use_count; i++)
3616 struct reg_use *reg_used = &reg_use_table[i];
3617 unsigned int regno = REGNO (reg_used->reg_rtx);
3618 basic_block dest, old_dest;
3619 struct expr *set;
3620 rtx src, new;
3622 if (regno >= max_gcse_regno)
3623 continue;
3625 set = find_bypass_set (regno, e->src->index);
3627 if (! set)
3628 continue;
3630 /* Check the data flow is valid after edge insertions. */
3631 if (e->insns.r && reg_killed_on_edge (reg_used->reg_rtx, e))
3632 continue;
3634 src = SET_SRC (pc_set (jump));
3636 if (setcc != NULL)
3637 src = simplify_replace_rtx (src,
3638 SET_DEST (PATTERN (setcc)),
3639 SET_SRC (PATTERN (setcc)));
3641 new = simplify_replace_rtx (src, reg_used->reg_rtx,
3642 SET_SRC (set->expr));
3644 /* Jump bypassing may have already placed instructions on
3645 edges of the CFG. We can't bypass an outgoing edge that
3646 has instructions associated with it, as these insns won't
3647 get executed if the incoming edge is redirected. */
3649 if (new == pc_rtx)
3651 edest = FALLTHRU_EDGE (bb);
3652 dest = edest->insns.r ? NULL : edest->dest;
3654 else if (GET_CODE (new) == LABEL_REF)
3656 dest = BLOCK_FOR_INSN (XEXP (new, 0));
3657 /* Don't bypass edges containing instructions. */
3658 edest = find_edge (bb, dest);
3659 if (edest && edest->insns.r)
3660 dest = NULL;
3662 else
3663 dest = NULL;
3665 /* Avoid unification of the edge with other edges from original
3666 branch. We would end up emitting the instruction on "both"
3667 edges. */
3669 if (dest && setcc && !CC0_P (SET_DEST (PATTERN (setcc)))
3670 && find_edge (e->src, dest))
3671 dest = NULL;
3673 old_dest = e->dest;
3674 if (dest != NULL
3675 && dest != old_dest
3676 && dest != EXIT_BLOCK_PTR)
3678 redirect_edge_and_branch_force (e, dest);
3680 /* Copy the register setter to the redirected edge.
3681 Don't copy CC0 setters, as CC0 is dead after jump. */
3682 if (setcc)
3684 rtx pat = PATTERN (setcc);
3685 if (!CC0_P (SET_DEST (pat)))
3686 insert_insn_on_edge (copy_insn (pat), e);
3689 if (dump_file != NULL)
3691 fprintf (dump_file, "JUMP-BYPASS: Proved reg %d "
3692 "in jump_insn %d equals constant ",
3693 regno, INSN_UID (jump));
3694 print_rtl (dump_file, SET_SRC (set->expr));
3695 fprintf (dump_file, "\nBypass edge from %d->%d to %d\n",
3696 e->src->index, old_dest->index, dest->index);
3698 change = 1;
3699 removed_p = 1;
3700 break;
3703 if (!removed_p)
3704 ei_next (&ei);
3706 return change;
3709 /* Find basic blocks with more than one predecessor that only contain a
3710 single conditional jump. If the result of the comparison is known at
3711 compile-time from any incoming edge, redirect that edge to the
3712 appropriate target. Returns nonzero if a change was made.
3714 This function is now mis-named, because we also handle indirect jumps. */
3716 static int
3717 bypass_conditional_jumps (void)
3719 basic_block bb;
3720 int changed;
3721 rtx setcc;
3722 rtx insn;
3723 rtx dest;
3725 /* Note we start at block 1. */
3726 if (ENTRY_BLOCK_PTR->next_bb == EXIT_BLOCK_PTR)
3727 return 0;
3729 bypass_last_basic_block = last_basic_block;
3730 mark_dfs_back_edges ();
3732 changed = 0;
3733 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR->next_bb->next_bb,
3734 EXIT_BLOCK_PTR, next_bb)
3736 /* Check for more than one predecessor. */
3737 if (!single_pred_p (bb))
3739 setcc = NULL_RTX;
3740 FOR_BB_INSNS (bb, insn)
3741 if (NONJUMP_INSN_P (insn))
3743 if (setcc)
3744 break;
3745 if (GET_CODE (PATTERN (insn)) != SET)
3746 break;
3748 dest = SET_DEST (PATTERN (insn));
3749 if (REG_P (dest) || CC0_P (dest))
3750 setcc = insn;
3751 else
3752 break;
3754 else if (JUMP_P (insn))
3756 if ((any_condjump_p (insn) || computed_jump_p (insn))
3757 && onlyjump_p (insn))
3758 changed |= bypass_block (bb, setcc, insn);
3759 break;
3761 else if (INSN_P (insn))
3762 break;
3766 /* If we bypassed any register setting insns, we inserted a
3767 copy on the redirected edge. These need to be committed. */
3768 if (changed)
3769 commit_edge_insertions ();
3771 return changed;
3774 /* Compute PRE+LCM working variables. */
3776 /* Local properties of expressions. */
3777 /* Nonzero for expressions that are transparent in the block. */
3778 static sbitmap *transp;
3780 /* Nonzero for expressions that are transparent at the end of the block.
3781 This is only zero for expressions killed by abnormal critical edge
3782 created by a calls. */
3783 static sbitmap *transpout;
3785 /* Nonzero for expressions that are computed (available) in the block. */
3786 static sbitmap *comp;
3788 /* Nonzero for expressions that are locally anticipatable in the block. */
3789 static sbitmap *antloc;
3791 /* Nonzero for expressions where this block is an optimal computation
3792 point. */
3793 static sbitmap *pre_optimal;
3795 /* Nonzero for expressions which are redundant in a particular block. */
3796 static sbitmap *pre_redundant;
3798 /* Nonzero for expressions which should be inserted on a specific edge. */
3799 static sbitmap *pre_insert_map;
3801 /* Nonzero for expressions which should be deleted in a specific block. */
3802 static sbitmap *pre_delete_map;
3804 /* Contains the edge_list returned by pre_edge_lcm. */
3805 static struct edge_list *edge_list;
3807 /* Redundant insns. */
3808 static sbitmap pre_redundant_insns;
3810 /* Allocate vars used for PRE analysis. */
3812 static void
3813 alloc_pre_mem (int n_blocks, int n_exprs)
3815 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
3816 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
3817 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
3819 pre_optimal = NULL;
3820 pre_redundant = NULL;
3821 pre_insert_map = NULL;
3822 pre_delete_map = NULL;
3823 ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
3825 /* pre_insert and pre_delete are allocated later. */
3828 /* Free vars used for PRE analysis. */
3830 static void
3831 free_pre_mem (void)
3833 sbitmap_vector_free (transp);
3834 sbitmap_vector_free (comp);
3836 /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */
3838 if (pre_optimal)
3839 sbitmap_vector_free (pre_optimal);
3840 if (pre_redundant)
3841 sbitmap_vector_free (pre_redundant);
3842 if (pre_insert_map)
3843 sbitmap_vector_free (pre_insert_map);
3844 if (pre_delete_map)
3845 sbitmap_vector_free (pre_delete_map);
3847 transp = comp = NULL;
3848 pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
3851 /* Top level routine to do the dataflow analysis needed by PRE. */
3853 static void
3854 compute_pre_data (void)
3856 sbitmap trapping_expr;
3857 basic_block bb;
3858 unsigned int ui;
3860 compute_local_properties (transp, comp, antloc, &expr_hash_table);
3861 sbitmap_vector_zero (ae_kill, last_basic_block);
3863 /* Collect expressions which might trap. */
3864 trapping_expr = sbitmap_alloc (expr_hash_table.n_elems);
3865 sbitmap_zero (trapping_expr);
3866 for (ui = 0; ui < expr_hash_table.size; ui++)
3868 struct expr *e;
3869 for (e = expr_hash_table.table[ui]; e != NULL; e = e->next_same_hash)
3870 if (may_trap_p (e->expr))
3871 SET_BIT (trapping_expr, e->bitmap_index);
3874 /* Compute ae_kill for each basic block using:
3876 ~(TRANSP | COMP)
3879 FOR_EACH_BB (bb)
3881 edge e;
3882 edge_iterator ei;
3884 /* If the current block is the destination of an abnormal edge, we
3885 kill all trapping expressions because we won't be able to properly
3886 place the instruction on the edge. So make them neither
3887 anticipatable nor transparent. This is fairly conservative. */
3888 FOR_EACH_EDGE (e, ei, bb->preds)
3889 if (e->flags & EDGE_ABNORMAL)
3891 sbitmap_difference (antloc[bb->index], antloc[bb->index], trapping_expr);
3892 sbitmap_difference (transp[bb->index], transp[bb->index], trapping_expr);
3893 break;
3896 sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
3897 sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
3900 edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc,
3901 ae_kill, &pre_insert_map, &pre_delete_map);
3902 sbitmap_vector_free (antloc);
3903 antloc = NULL;
3904 sbitmap_vector_free (ae_kill);
3905 ae_kill = NULL;
3906 sbitmap_free (trapping_expr);
3909 /* PRE utilities */
3911 /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach
3912 block BB.
3914 VISITED is a pointer to a working buffer for tracking which BB's have
3915 been visited. It is NULL for the top-level call.
3917 We treat reaching expressions that go through blocks containing the same
3918 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
3919 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
3920 2 as not reaching. The intent is to improve the probability of finding
3921 only one reaching expression and to reduce register lifetimes by picking
3922 the closest such expression. */
3924 static int
3925 pre_expr_reaches_here_p_work (basic_block occr_bb, struct expr *expr, basic_block bb, char *visited)
3927 edge pred;
3928 edge_iterator ei;
3930 FOR_EACH_EDGE (pred, ei, bb->preds)
3932 basic_block pred_bb = pred->src;
3934 if (pred->src == ENTRY_BLOCK_PTR
3935 /* Has predecessor has already been visited? */
3936 || visited[pred_bb->index])
3937 ;/* Nothing to do. */
3939 /* Does this predecessor generate this expression? */
3940 else if (TEST_BIT (comp[pred_bb->index], expr->bitmap_index))
3942 /* Is this the occurrence we're looking for?
3943 Note that there's only one generating occurrence per block
3944 so we just need to check the block number. */
3945 if (occr_bb == pred_bb)
3946 return 1;
3948 visited[pred_bb->index] = 1;
3950 /* Ignore this predecessor if it kills the expression. */
3951 else if (! TEST_BIT (transp[pred_bb->index], expr->bitmap_index))
3952 visited[pred_bb->index] = 1;
3954 /* Neither gen nor kill. */
3955 else
3957 visited[pred_bb->index] = 1;
3958 if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
3959 return 1;
3963 /* All paths have been checked. */
3964 return 0;
3967 /* The wrapper for pre_expr_reaches_here_work that ensures that any
3968 memory allocated for that function is returned. */
3970 static int
3971 pre_expr_reaches_here_p (basic_block occr_bb, struct expr *expr, basic_block bb)
3973 int rval;
3974 char *visited = XCNEWVEC (char, last_basic_block);
3976 rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
3978 free (visited);
3979 return rval;
3983 /* Given an expr, generate RTL which we can insert at the end of a BB,
3984 or on an edge. Set the block number of any insns generated to
3985 the value of BB. */
3987 static rtx
3988 process_insert_insn (struct expr *expr)
3990 rtx reg = expr->reaching_reg;
3991 rtx exp = copy_rtx (expr->expr);
3992 rtx pat;
3994 start_sequence ();
3996 /* If the expression is something that's an operand, like a constant,
3997 just copy it to a register. */
3998 if (general_operand (exp, GET_MODE (reg)))
3999 emit_move_insn (reg, exp);
4001 /* Otherwise, make a new insn to compute this expression and make sure the
4002 insn will be recognized (this also adds any needed CLOBBERs). Copy the
4003 expression to make sure we don't have any sharing issues. */
4004 else
4006 rtx insn = emit_insn (gen_rtx_SET (VOIDmode, reg, exp));
4008 if (insn_invalid_p (insn))
4009 gcc_unreachable ();
4013 pat = get_insns ();
4014 end_sequence ();
4016 return pat;
4019 /* Add EXPR to the end of basic block BB.
4021 This is used by both the PRE and code hoisting.
4023 For PRE, we want to verify that the expr is either transparent
4024 or locally anticipatable in the target block. This check makes
4025 no sense for code hoisting. */
4027 static void
4028 insert_insn_end_basic_block (struct expr *expr, basic_block bb, int pre)
4030 rtx insn = BB_END (bb);
4031 rtx new_insn;
4032 rtx reg = expr->reaching_reg;
4033 int regno = REGNO (reg);
4034 rtx pat, pat_end;
4036 pat = process_insert_insn (expr);
4037 gcc_assert (pat && INSN_P (pat));
4039 pat_end = pat;
4040 while (NEXT_INSN (pat_end) != NULL_RTX)
4041 pat_end = NEXT_INSN (pat_end);
4043 /* If the last insn is a jump, insert EXPR in front [taking care to
4044 handle cc0, etc. properly]. Similarly we need to care trapping
4045 instructions in presence of non-call exceptions. */
4047 if (JUMP_P (insn)
4048 || (NONJUMP_INSN_P (insn)
4049 && (!single_succ_p (bb)
4050 || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
4052 #ifdef HAVE_cc0
4053 rtx note;
4054 #endif
4055 /* It should always be the case that we can put these instructions
4056 anywhere in the basic block with performing PRE optimizations.
4057 Check this. */
4058 gcc_assert (!NONJUMP_INSN_P (insn) || !pre
4059 || TEST_BIT (antloc[bb->index], expr->bitmap_index)
4060 || TEST_BIT (transp[bb->index], expr->bitmap_index));
4062 /* If this is a jump table, then we can't insert stuff here. Since
4063 we know the previous real insn must be the tablejump, we insert
4064 the new instruction just before the tablejump. */
4065 if (GET_CODE (PATTERN (insn)) == ADDR_VEC
4066 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
4067 insn = prev_real_insn (insn);
4069 #ifdef HAVE_cc0
4070 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4071 if cc0 isn't set. */
4072 note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
4073 if (note)
4074 insn = XEXP (note, 0);
4075 else
4077 rtx maybe_cc0_setter = prev_nonnote_insn (insn);
4078 if (maybe_cc0_setter
4079 && INSN_P (maybe_cc0_setter)
4080 && sets_cc0_p (PATTERN (maybe_cc0_setter)))
4081 insn = maybe_cc0_setter;
4083 #endif
4084 /* FIXME: What if something in cc0/jump uses value set in new insn? */
4085 new_insn = emit_insn_before_noloc (pat, insn, bb);
4088 /* Likewise if the last insn is a call, as will happen in the presence
4089 of exception handling. */
4090 else if (CALL_P (insn)
4091 && (!single_succ_p (bb)
4092 || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
4094 /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4095 we search backward and place the instructions before the first
4096 parameter is loaded. Do this for everyone for consistency and a
4097 presumption that we'll get better code elsewhere as well.
4099 It should always be the case that we can put these instructions
4100 anywhere in the basic block with performing PRE optimizations.
4101 Check this. */
4103 gcc_assert (!pre
4104 || TEST_BIT (antloc[bb->index], expr->bitmap_index)
4105 || TEST_BIT (transp[bb->index], expr->bitmap_index));
4107 /* Since different machines initialize their parameter registers
4108 in different orders, assume nothing. Collect the set of all
4109 parameter registers. */
4110 insn = find_first_parameter_load (insn, BB_HEAD (bb));
4112 /* If we found all the parameter loads, then we want to insert
4113 before the first parameter load.
4115 If we did not find all the parameter loads, then we might have
4116 stopped on the head of the block, which could be a CODE_LABEL.
4117 If we inserted before the CODE_LABEL, then we would be putting
4118 the insn in the wrong basic block. In that case, put the insn
4119 after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */
4120 while (LABEL_P (insn)
4121 || NOTE_INSN_BASIC_BLOCK_P (insn))
4122 insn = NEXT_INSN (insn);
4124 new_insn = emit_insn_before_noloc (pat, insn, bb);
4126 else
4127 new_insn = emit_insn_after_noloc (pat, insn, bb);
4129 while (1)
4131 if (INSN_P (pat))
4133 add_label_notes (PATTERN (pat), new_insn);
4134 note_stores (PATTERN (pat), record_set_info, pat);
4136 if (pat == pat_end)
4137 break;
4138 pat = NEXT_INSN (pat);
4141 gcse_create_count++;
4143 if (dump_file)
4145 fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ",
4146 bb->index, INSN_UID (new_insn));
4147 fprintf (dump_file, "copying expression %d to reg %d\n",
4148 expr->bitmap_index, regno);
4152 /* Insert partially redundant expressions on edges in the CFG to make
4153 the expressions fully redundant. */
4155 static int
4156 pre_edge_insert (struct edge_list *edge_list, struct expr **index_map)
4158 int e, i, j, num_edges, set_size, did_insert = 0;
4159 sbitmap *inserted;
4161 /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
4162 if it reaches any of the deleted expressions. */
4164 set_size = pre_insert_map[0]->size;
4165 num_edges = NUM_EDGES (edge_list);
4166 inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems);
4167 sbitmap_vector_zero (inserted, num_edges);
4169 for (e = 0; e < num_edges; e++)
4171 int indx;
4172 basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
4174 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
4176 SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
4178 for (j = indx; insert && j < (int) expr_hash_table.n_elems; j++, insert >>= 1)
4179 if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
4181 struct expr *expr = index_map[j];
4182 struct occr *occr;
4184 /* Now look at each deleted occurrence of this expression. */
4185 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4187 if (! occr->deleted_p)
4188 continue;
4190 /* Insert this expression on this edge if it would
4191 reach the deleted occurrence in BB. */
4192 if (!TEST_BIT (inserted[e], j))
4194 rtx insn;
4195 edge eg = INDEX_EDGE (edge_list, e);
4197 /* We can't insert anything on an abnormal and
4198 critical edge, so we insert the insn at the end of
4199 the previous block. There are several alternatives
4200 detailed in Morgans book P277 (sec 10.5) for
4201 handling this situation. This one is easiest for
4202 now. */
4204 if (eg->flags & EDGE_ABNORMAL)
4205 insert_insn_end_basic_block (index_map[j], bb, 0);
4206 else
4208 insn = process_insert_insn (index_map[j]);
4209 insert_insn_on_edge (insn, eg);
4212 if (dump_file)
4214 fprintf (dump_file, "PRE/HOIST: edge (%d,%d), ",
4215 bb->index,
4216 INDEX_EDGE_SUCC_BB (edge_list, e)->index);
4217 fprintf (dump_file, "copy expression %d\n",
4218 expr->bitmap_index);
4221 update_ld_motion_stores (expr);
4222 SET_BIT (inserted[e], j);
4223 did_insert = 1;
4224 gcse_create_count++;
4231 sbitmap_vector_free (inserted);
4232 return did_insert;
4235 /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
4236 Given "old_reg <- expr" (INSN), instead of adding after it
4237 reaching_reg <- old_reg
4238 it's better to do the following:
4239 reaching_reg <- expr
4240 old_reg <- reaching_reg
4241 because this way copy propagation can discover additional PRE
4242 opportunities. But if this fails, we try the old way.
4243 When "expr" is a store, i.e.
4244 given "MEM <- old_reg", instead of adding after it
4245 reaching_reg <- old_reg
4246 it's better to add it before as follows:
4247 reaching_reg <- old_reg
4248 MEM <- reaching_reg. */
4250 static void
4251 pre_insert_copy_insn (struct expr *expr, rtx insn)
4253 rtx reg = expr->reaching_reg;
4254 int regno = REGNO (reg);
4255 int indx = expr->bitmap_index;
4256 rtx pat = PATTERN (insn);
4257 rtx set, first_set, new_insn;
4258 rtx old_reg;
4259 int i;
4261 /* This block matches the logic in hash_scan_insn. */
4262 switch (GET_CODE (pat))
4264 case SET:
4265 set = pat;
4266 break;
4268 case PARALLEL:
4269 /* Search through the parallel looking for the set whose
4270 source was the expression that we're interested in. */
4271 first_set = NULL_RTX;
4272 set = NULL_RTX;
4273 for (i = 0; i < XVECLEN (pat, 0); i++)
4275 rtx x = XVECEXP (pat, 0, i);
4276 if (GET_CODE (x) == SET)
4278 /* If the source was a REG_EQUAL or REG_EQUIV note, we
4279 may not find an equivalent expression, but in this
4280 case the PARALLEL will have a single set. */
4281 if (first_set == NULL_RTX)
4282 first_set = x;
4283 if (expr_equiv_p (SET_SRC (x), expr->expr))
4285 set = x;
4286 break;
4291 gcc_assert (first_set);
4292 if (set == NULL_RTX)
4293 set = first_set;
4294 break;
4296 default:
4297 gcc_unreachable ();
4300 if (REG_P (SET_DEST (set)))
4302 old_reg = SET_DEST (set);
4303 /* Check if we can modify the set destination in the original insn. */
4304 if (validate_change (insn, &SET_DEST (set), reg, 0))
4306 new_insn = gen_move_insn (old_reg, reg);
4307 new_insn = emit_insn_after (new_insn, insn);
4309 /* Keep register set table up to date. */
4310 record_one_set (regno, insn);
4312 else
4314 new_insn = gen_move_insn (reg, old_reg);
4315 new_insn = emit_insn_after (new_insn, insn);
4317 /* Keep register set table up to date. */
4318 record_one_set (regno, new_insn);
4321 else /* This is possible only in case of a store to memory. */
4323 old_reg = SET_SRC (set);
4324 new_insn = gen_move_insn (reg, old_reg);
4326 /* Check if we can modify the set source in the original insn. */
4327 if (validate_change (insn, &SET_SRC (set), reg, 0))
4328 new_insn = emit_insn_before (new_insn, insn);
4329 else
4330 new_insn = emit_insn_after (new_insn, insn);
4332 /* Keep register set table up to date. */
4333 record_one_set (regno, new_insn);
4336 gcse_create_count++;
4338 if (dump_file)
4339 fprintf (dump_file,
4340 "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
4341 BLOCK_NUM (insn), INSN_UID (new_insn), indx,
4342 INSN_UID (insn), regno);
4345 /* Copy available expressions that reach the redundant expression
4346 to `reaching_reg'. */
4348 static void
4349 pre_insert_copies (void)
4351 unsigned int i, added_copy;
4352 struct expr *expr;
4353 struct occr *occr;
4354 struct occr *avail;
4356 /* For each available expression in the table, copy the result to
4357 `reaching_reg' if the expression reaches a deleted one.
4359 ??? The current algorithm is rather brute force.
4360 Need to do some profiling. */
4362 for (i = 0; i < expr_hash_table.size; i++)
4363 for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash)
4365 /* If the basic block isn't reachable, PPOUT will be TRUE. However,
4366 we don't want to insert a copy here because the expression may not
4367 really be redundant. So only insert an insn if the expression was
4368 deleted. This test also avoids further processing if the
4369 expression wasn't deleted anywhere. */
4370 if (expr->reaching_reg == NULL)
4371 continue;
4373 /* Set when we add a copy for that expression. */
4374 added_copy = 0;
4376 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4378 if (! occr->deleted_p)
4379 continue;
4381 for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
4383 rtx insn = avail->insn;
4385 /* No need to handle this one if handled already. */
4386 if (avail->copied_p)
4387 continue;
4389 /* Don't handle this one if it's a redundant one. */
4390 if (TEST_BIT (pre_redundant_insns, INSN_CUID (insn)))
4391 continue;
4393 /* Or if the expression doesn't reach the deleted one. */
4394 if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
4395 expr,
4396 BLOCK_FOR_INSN (occr->insn)))
4397 continue;
4399 added_copy = 1;
4401 /* Copy the result of avail to reaching_reg. */
4402 pre_insert_copy_insn (expr, insn);
4403 avail->copied_p = 1;
4407 if (added_copy)
4408 update_ld_motion_stores (expr);
4412 /* Emit move from SRC to DEST noting the equivalence with expression computed
4413 in INSN. */
4414 static rtx
4415 gcse_emit_move_after (rtx src, rtx dest, rtx insn)
4417 rtx new;
4418 rtx set = single_set (insn), set2;
4419 rtx note;
4420 rtx eqv;
4422 /* This should never fail since we're creating a reg->reg copy
4423 we've verified to be valid. */
4425 new = emit_insn_after (gen_move_insn (dest, src), insn);
4427 /* Note the equivalence for local CSE pass. */
4428 set2 = single_set (new);
4429 if (!set2 || !rtx_equal_p (SET_DEST (set2), dest))
4430 return new;
4431 if ((note = find_reg_equal_equiv_note (insn)))
4432 eqv = XEXP (note, 0);
4433 else
4434 eqv = SET_SRC (set);
4436 set_unique_reg_note (new, REG_EQUAL, copy_insn_1 (eqv));
4438 return new;
4441 /* Delete redundant computations.
4442 Deletion is done by changing the insn to copy the `reaching_reg' of
4443 the expression into the result of the SET. It is left to later passes
4444 (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4446 Returns nonzero if a change is made. */
4448 static int
4449 pre_delete (void)
4451 unsigned int i;
4452 int changed;
4453 struct expr *expr;
4454 struct occr *occr;
4456 changed = 0;
4457 for (i = 0; i < expr_hash_table.size; i++)
4458 for (expr = expr_hash_table.table[i];
4459 expr != NULL;
4460 expr = expr->next_same_hash)
4462 int indx = expr->bitmap_index;
4464 /* We only need to search antic_occr since we require
4465 ANTLOC != 0. */
4467 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4469 rtx insn = occr->insn;
4470 rtx set;
4471 basic_block bb = BLOCK_FOR_INSN (insn);
4473 /* We only delete insns that have a single_set. */
4474 if (TEST_BIT (pre_delete_map[bb->index], indx)
4475 && (set = single_set (insn)) != 0
4476 && dbg_cnt (pre_insn))
4478 /* Create a pseudo-reg to store the result of reaching
4479 expressions into. Get the mode for the new pseudo from
4480 the mode of the original destination pseudo. */
4481 if (expr->reaching_reg == NULL)
4482 expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set));
4484 gcse_emit_move_after (expr->reaching_reg, SET_DEST (set), insn);
4485 delete_insn (insn);
4486 occr->deleted_p = 1;
4487 SET_BIT (pre_redundant_insns, INSN_CUID (insn));
4488 changed = 1;
4489 gcse_subst_count++;
4491 if (dump_file)
4493 fprintf (dump_file,
4494 "PRE: redundant insn %d (expression %d) in ",
4495 INSN_UID (insn), indx);
4496 fprintf (dump_file, "bb %d, reaching reg is %d\n",
4497 bb->index, REGNO (expr->reaching_reg));
4503 return changed;
4506 /* Perform GCSE optimizations using PRE.
4507 This is called by one_pre_gcse_pass after all the dataflow analysis
4508 has been done.
4510 This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
4511 lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
4512 Compiler Design and Implementation.
4514 ??? A new pseudo reg is created to hold the reaching expression. The nice
4515 thing about the classical approach is that it would try to use an existing
4516 reg. If the register can't be adequately optimized [i.e. we introduce
4517 reload problems], one could add a pass here to propagate the new register
4518 through the block.
4520 ??? We don't handle single sets in PARALLELs because we're [currently] not
4521 able to copy the rest of the parallel when we insert copies to create full
4522 redundancies from partial redundancies. However, there's no reason why we
4523 can't handle PARALLELs in the cases where there are no partial
4524 redundancies. */
4526 static int
4527 pre_gcse (void)
4529 unsigned int i;
4530 int did_insert, changed;
4531 struct expr **index_map;
4532 struct expr *expr;
4534 /* Compute a mapping from expression number (`bitmap_index') to
4535 hash table entry. */
4537 index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
4538 for (i = 0; i < expr_hash_table.size; i++)
4539 for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash)
4540 index_map[expr->bitmap_index] = expr;
4542 /* Reset bitmap used to track which insns are redundant. */
4543 pre_redundant_insns = sbitmap_alloc (max_cuid);
4544 sbitmap_zero (pre_redundant_insns);
4546 /* Delete the redundant insns first so that
4547 - we know what register to use for the new insns and for the other
4548 ones with reaching expressions
4549 - we know which insns are redundant when we go to create copies */
4551 changed = pre_delete ();
4552 did_insert = pre_edge_insert (edge_list, index_map);
4554 /* In other places with reaching expressions, copy the expression to the
4555 specially allocated pseudo-reg that reaches the redundant expr. */
4556 pre_insert_copies ();
4557 if (did_insert)
4559 commit_edge_insertions ();
4560 changed = 1;
4563 free (index_map);
4564 sbitmap_free (pre_redundant_insns);
4565 return changed;
4568 /* Top level routine to perform one PRE GCSE pass.
4570 Return nonzero if a change was made. */
4572 static int
4573 one_pre_gcse_pass (int pass)
4575 int changed = 0;
4577 gcse_subst_count = 0;
4578 gcse_create_count = 0;
4580 alloc_hash_table (max_cuid, &expr_hash_table, 0);
4581 add_noreturn_fake_exit_edges ();
4582 if (flag_gcse_lm)
4583 compute_ld_motion_mems ();
4585 compute_hash_table (&expr_hash_table);
4586 trim_ld_motion_mems ();
4587 if (dump_file)
4588 dump_hash_table (dump_file, "Expression", &expr_hash_table);
4590 if (expr_hash_table.n_elems > 0)
4592 alloc_pre_mem (last_basic_block, expr_hash_table.n_elems);
4593 compute_pre_data ();
4594 changed |= pre_gcse ();
4595 free_edge_list (edge_list);
4596 free_pre_mem ();
4599 free_ldst_mems ();
4600 remove_fake_exit_edges ();
4601 free_hash_table (&expr_hash_table);
4603 if (dump_file)
4605 fprintf (dump_file, "\nPRE GCSE of %s, pass %d: %d bytes needed, ",
4606 current_function_name (), pass, bytes_used);
4607 fprintf (dump_file, "%d substs, %d insns created\n",
4608 gcse_subst_count, gcse_create_count);
4611 return changed;
4614 /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
4615 to INSN. If such notes are added to an insn which references a
4616 CODE_LABEL, the LABEL_NUSES count is incremented. We have to add
4617 that note, because the following loop optimization pass requires
4618 them. */
4620 /* ??? If there was a jump optimization pass after gcse and before loop,
4621 then we would not need to do this here, because jump would add the
4622 necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes. */
4624 static void
4625 add_label_notes (rtx x, rtx insn)
4627 enum rtx_code code = GET_CODE (x);
4628 int i, j;
4629 const char *fmt;
4631 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4633 /* This code used to ignore labels that referred to dispatch tables to
4634 avoid flow generating (slightly) worse code.
4636 We no longer ignore such label references (see LABEL_REF handling in
4637 mark_jump_label for additional information). */
4639 /* There's no reason for current users to emit jump-insns with
4640 such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
4641 notes. */
4642 gcc_assert (!JUMP_P (insn));
4643 REG_NOTES (insn)
4644 = gen_rtx_INSN_LIST (REG_LABEL_OPERAND, XEXP (x, 0),
4645 REG_NOTES (insn));
4646 if (LABEL_P (XEXP (x, 0)))
4647 LABEL_NUSES (XEXP (x, 0))++;
4649 return;
4652 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
4654 if (fmt[i] == 'e')
4655 add_label_notes (XEXP (x, i), insn);
4656 else if (fmt[i] == 'E')
4657 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4658 add_label_notes (XVECEXP (x, i, j), insn);
4662 /* Compute transparent outgoing information for each block.
4664 An expression is transparent to an edge unless it is killed by
4665 the edge itself. This can only happen with abnormal control flow,
4666 when the edge is traversed through a call. This happens with
4667 non-local labels and exceptions.
4669 This would not be necessary if we split the edge. While this is
4670 normally impossible for abnormal critical edges, with some effort
4671 it should be possible with exception handling, since we still have
4672 control over which handler should be invoked. But due to increased
4673 EH table sizes, this may not be worthwhile. */
4675 static void
4676 compute_transpout (void)
4678 basic_block bb;
4679 unsigned int i;
4680 struct expr *expr;
4682 sbitmap_vector_ones (transpout, last_basic_block);
4684 FOR_EACH_BB (bb)
4686 /* Note that flow inserted a nop a the end of basic blocks that
4687 end in call instructions for reasons other than abnormal
4688 control flow. */
4689 if (! CALL_P (BB_END (bb)))
4690 continue;
4692 for (i = 0; i < expr_hash_table.size; i++)
4693 for (expr = expr_hash_table.table[i]; expr ; expr = expr->next_same_hash)
4694 if (MEM_P (expr->expr))
4696 if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
4697 && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
4698 continue;
4700 /* ??? Optimally, we would use interprocedural alias
4701 analysis to determine if this mem is actually killed
4702 by this call. */
4703 RESET_BIT (transpout[bb->index], expr->bitmap_index);
4708 /* Code Hoisting variables and subroutines. */
4710 /* Very busy expressions. */
4711 static sbitmap *hoist_vbein;
4712 static sbitmap *hoist_vbeout;
4714 /* Hoistable expressions. */
4715 static sbitmap *hoist_exprs;
4717 /* ??? We could compute post dominators and run this algorithm in
4718 reverse to perform tail merging, doing so would probably be
4719 more effective than the tail merging code in jump.c.
4721 It's unclear if tail merging could be run in parallel with
4722 code hoisting. It would be nice. */
4724 /* Allocate vars used for code hoisting analysis. */
4726 static void
4727 alloc_code_hoist_mem (int n_blocks, int n_exprs)
4729 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
4730 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4731 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4733 hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
4734 hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
4735 hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
4736 transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
4739 /* Free vars used for code hoisting analysis. */
4741 static void
4742 free_code_hoist_mem (void)
4744 sbitmap_vector_free (antloc);
4745 sbitmap_vector_free (transp);
4746 sbitmap_vector_free (comp);
4748 sbitmap_vector_free (hoist_vbein);
4749 sbitmap_vector_free (hoist_vbeout);
4750 sbitmap_vector_free (hoist_exprs);
4751 sbitmap_vector_free (transpout);
4753 free_dominance_info (CDI_DOMINATORS);
4756 /* Compute the very busy expressions at entry/exit from each block.
4758 An expression is very busy if all paths from a given point
4759 compute the expression. */
4761 static void
4762 compute_code_hoist_vbeinout (void)
4764 int changed, passes;
4765 basic_block bb;
4767 sbitmap_vector_zero (hoist_vbeout, last_basic_block);
4768 sbitmap_vector_zero (hoist_vbein, last_basic_block);
4770 passes = 0;
4771 changed = 1;
4773 while (changed)
4775 changed = 0;
4777 /* We scan the blocks in the reverse order to speed up
4778 the convergence. */
4779 FOR_EACH_BB_REVERSE (bb)
4781 if (bb->next_bb != EXIT_BLOCK_PTR)
4782 sbitmap_intersection_of_succs (hoist_vbeout[bb->index],
4783 hoist_vbein, bb->index);
4785 changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index],
4786 antloc[bb->index],
4787 hoist_vbeout[bb->index],
4788 transp[bb->index]);
4791 passes++;
4794 if (dump_file)
4795 fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
4798 /* Top level routine to do the dataflow analysis needed by code hoisting. */
4800 static void
4801 compute_code_hoist_data (void)
4803 compute_local_properties (transp, comp, antloc, &expr_hash_table);
4804 compute_transpout ();
4805 compute_code_hoist_vbeinout ();
4806 calculate_dominance_info (CDI_DOMINATORS);
4807 if (dump_file)
4808 fprintf (dump_file, "\n");
4811 /* Determine if the expression identified by EXPR_INDEX would
4812 reach BB unimpared if it was placed at the end of EXPR_BB.
4814 It's unclear exactly what Muchnick meant by "unimpared". It seems
4815 to me that the expression must either be computed or transparent in
4816 *every* block in the path(s) from EXPR_BB to BB. Any other definition
4817 would allow the expression to be hoisted out of loops, even if
4818 the expression wasn't a loop invariant.
4820 Contrast this to reachability for PRE where an expression is
4821 considered reachable if *any* path reaches instead of *all*
4822 paths. */
4824 static int
4825 hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb, char *visited)
4827 edge pred;
4828 edge_iterator ei;
4829 int visited_allocated_locally = 0;
4832 if (visited == NULL)
4834 visited_allocated_locally = 1;
4835 visited = XCNEWVEC (char, last_basic_block);
4838 FOR_EACH_EDGE (pred, ei, bb->preds)
4840 basic_block pred_bb = pred->src;
4842 if (pred->src == ENTRY_BLOCK_PTR)
4843 break;
4844 else if (pred_bb == expr_bb)
4845 continue;
4846 else if (visited[pred_bb->index])
4847 continue;
4849 /* Does this predecessor generate this expression? */
4850 else if (TEST_BIT (comp[pred_bb->index], expr_index))
4851 break;
4852 else if (! TEST_BIT (transp[pred_bb->index], expr_index))
4853 break;
4855 /* Not killed. */
4856 else
4858 visited[pred_bb->index] = 1;
4859 if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
4860 pred_bb, visited))
4861 break;
4864 if (visited_allocated_locally)
4865 free (visited);
4867 return (pred == NULL);
4870 /* Actually perform code hoisting. */
4872 static void
4873 hoist_code (void)
4875 basic_block bb, dominated;
4876 VEC (basic_block, heap) *domby;
4877 unsigned int i,j;
4878 struct expr **index_map;
4879 struct expr *expr;
4881 sbitmap_vector_zero (hoist_exprs, last_basic_block);
4883 /* Compute a mapping from expression number (`bitmap_index') to
4884 hash table entry. */
4886 index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
4887 for (i = 0; i < expr_hash_table.size; i++)
4888 for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash)
4889 index_map[expr->bitmap_index] = expr;
4891 /* Walk over each basic block looking for potentially hoistable
4892 expressions, nothing gets hoisted from the entry block. */
4893 FOR_EACH_BB (bb)
4895 int found = 0;
4896 int insn_inserted_p;
4898 domby = get_dominated_by (CDI_DOMINATORS, bb);
4899 /* Examine each expression that is very busy at the exit of this
4900 block. These are the potentially hoistable expressions. */
4901 for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++)
4903 int hoistable = 0;
4905 if (TEST_BIT (hoist_vbeout[bb->index], i)
4906 && TEST_BIT (transpout[bb->index], i))
4908 /* We've found a potentially hoistable expression, now
4909 we look at every block BB dominates to see if it
4910 computes the expression. */
4911 for (j = 0; VEC_iterate (basic_block, domby, j, dominated); j++)
4913 /* Ignore self dominance. */
4914 if (bb == dominated)
4915 continue;
4916 /* We've found a dominated block, now see if it computes
4917 the busy expression and whether or not moving that
4918 expression to the "beginning" of that block is safe. */
4919 if (!TEST_BIT (antloc[dominated->index], i))
4920 continue;
4922 /* Note if the expression would reach the dominated block
4923 unimpared if it was placed at the end of BB.
4925 Keep track of how many times this expression is hoistable
4926 from a dominated block into BB. */
4927 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
4928 hoistable++;
4931 /* If we found more than one hoistable occurrence of this
4932 expression, then note it in the bitmap of expressions to
4933 hoist. It makes no sense to hoist things which are computed
4934 in only one BB, and doing so tends to pessimize register
4935 allocation. One could increase this value to try harder
4936 to avoid any possible code expansion due to register
4937 allocation issues; however experiments have shown that
4938 the vast majority of hoistable expressions are only movable
4939 from two successors, so raising this threshold is likely
4940 to nullify any benefit we get from code hoisting. */
4941 if (hoistable > 1)
4943 SET_BIT (hoist_exprs[bb->index], i);
4944 found = 1;
4948 /* If we found nothing to hoist, then quit now. */
4949 if (! found)
4951 VEC_free (basic_block, heap, domby);
4952 continue;
4955 /* Loop over all the hoistable expressions. */
4956 for (i = 0; i < hoist_exprs[bb->index]->n_bits; i++)
4958 /* We want to insert the expression into BB only once, so
4959 note when we've inserted it. */
4960 insn_inserted_p = 0;
4962 /* These tests should be the same as the tests above. */
4963 if (TEST_BIT (hoist_exprs[bb->index], i))
4965 /* We've found a potentially hoistable expression, now
4966 we look at every block BB dominates to see if it
4967 computes the expression. */
4968 for (j = 0; VEC_iterate (basic_block, domby, j, dominated); j++)
4970 /* Ignore self dominance. */
4971 if (bb == dominated)
4972 continue;
4974 /* We've found a dominated block, now see if it computes
4975 the busy expression and whether or not moving that
4976 expression to the "beginning" of that block is safe. */
4977 if (!TEST_BIT (antloc[dominated->index], i))
4978 continue;
4980 /* The expression is computed in the dominated block and
4981 it would be safe to compute it at the start of the
4982 dominated block. Now we have to determine if the
4983 expression would reach the dominated block if it was
4984 placed at the end of BB. */
4985 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
4987 struct expr *expr = index_map[i];
4988 struct occr *occr = expr->antic_occr;
4989 rtx insn;
4990 rtx set;
4992 /* Find the right occurrence of this expression. */
4993 while (BLOCK_FOR_INSN (occr->insn) != dominated && occr)
4994 occr = occr->next;
4996 gcc_assert (occr);
4997 insn = occr->insn;
4998 set = single_set (insn);
4999 gcc_assert (set);
5001 /* Create a pseudo-reg to store the result of reaching
5002 expressions into. Get the mode for the new pseudo
5003 from the mode of the original destination pseudo. */
5004 if (expr->reaching_reg == NULL)
5005 expr->reaching_reg
5006 = gen_reg_rtx_and_attrs (SET_DEST (set));
5008 gcse_emit_move_after (expr->reaching_reg, SET_DEST (set), insn);
5009 delete_insn (insn);
5010 occr->deleted_p = 1;
5011 if (!insn_inserted_p)
5013 insert_insn_end_basic_block (index_map[i], bb, 0);
5014 insn_inserted_p = 1;
5020 VEC_free (basic_block, heap, domby);
5023 free (index_map);
5026 /* Top level routine to perform one code hoisting (aka unification) pass
5028 Return nonzero if a change was made. */
5030 static int
5031 one_code_hoisting_pass (void)
5033 int changed = 0;
5035 alloc_hash_table (max_cuid, &expr_hash_table, 0);
5036 compute_hash_table (&expr_hash_table);
5037 if (dump_file)
5038 dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table);
5040 if (expr_hash_table.n_elems > 0)
5042 alloc_code_hoist_mem (last_basic_block, expr_hash_table.n_elems);
5043 compute_code_hoist_data ();
5044 hoist_code ();
5045 free_code_hoist_mem ();
5048 free_hash_table (&expr_hash_table);
5050 return changed;
5053 /* Here we provide the things required to do store motion towards
5054 the exit. In order for this to be effective, gcse also needed to
5055 be taught how to move a load when it is kill only by a store to itself.
5057 int i;
5058 float a[10];
5060 void foo(float scale)
5062 for (i=0; i<10; i++)
5063 a[i] *= scale;
5066 'i' is both loaded and stored to in the loop. Normally, gcse cannot move
5067 the load out since its live around the loop, and stored at the bottom
5068 of the loop.
5070 The 'Load Motion' referred to and implemented in this file is
5071 an enhancement to gcse which when using edge based lcm, recognizes
5072 this situation and allows gcse to move the load out of the loop.
5074 Once gcse has hoisted the load, store motion can then push this
5075 load towards the exit, and we end up with no loads or stores of 'i'
5076 in the loop. */
5078 static hashval_t
5079 pre_ldst_expr_hash (const void *p)
5081 int do_not_record_p = 0;
5082 const struct ls_expr *x = p;
5083 return hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false);
5086 static int
5087 pre_ldst_expr_eq (const void *p1, const void *p2)
5089 const struct ls_expr *ptr1 = p1, *ptr2 = p2;
5090 return expr_equiv_p (ptr1->pattern, ptr2->pattern);
5093 /* This will search the ldst list for a matching expression. If it
5094 doesn't find one, we create one and initialize it. */
5096 static struct ls_expr *
5097 ldst_entry (rtx x)
5099 int do_not_record_p = 0;
5100 struct ls_expr * ptr;
5101 unsigned int hash;
5102 void **slot;
5103 struct ls_expr e;
5105 hash = hash_rtx (x, GET_MODE (x), &do_not_record_p,
5106 NULL, /*have_reg_qty=*/false);
5108 e.pattern = x;
5109 slot = htab_find_slot_with_hash (pre_ldst_table, &e, hash, INSERT);
5110 if (*slot)
5111 return (struct ls_expr *)*slot;
5113 ptr = XNEW (struct ls_expr);
5115 ptr->next = pre_ldst_mems;
5116 ptr->expr = NULL;
5117 ptr->pattern = x;
5118 ptr->pattern_regs = NULL_RTX;
5119 ptr->loads = NULL_RTX;
5120 ptr->stores = NULL_RTX;
5121 ptr->reaching_reg = NULL_RTX;
5122 ptr->invalid = 0;
5123 ptr->index = 0;
5124 ptr->hash_index = hash;
5125 pre_ldst_mems = ptr;
5126 *slot = ptr;
5128 return ptr;
5131 /* Free up an individual ldst entry. */
5133 static void
5134 free_ldst_entry (struct ls_expr * ptr)
5136 free_INSN_LIST_list (& ptr->loads);
5137 free_INSN_LIST_list (& ptr->stores);
5139 free (ptr);
5142 /* Free up all memory associated with the ldst list. */
5144 static void
5145 free_ldst_mems (void)
5147 if (pre_ldst_table)
5148 htab_delete (pre_ldst_table);
5149 pre_ldst_table = NULL;
5151 while (pre_ldst_mems)
5153 struct ls_expr * tmp = pre_ldst_mems;
5155 pre_ldst_mems = pre_ldst_mems->next;
5157 free_ldst_entry (tmp);
5160 pre_ldst_mems = NULL;
5163 /* Dump debugging info about the ldst list. */
5165 static void
5166 print_ldst_list (FILE * file)
5168 struct ls_expr * ptr;
5170 fprintf (file, "LDST list: \n");
5172 for (ptr = first_ls_expr (); ptr != NULL; ptr = next_ls_expr (ptr))
5174 fprintf (file, " Pattern (%3d): ", ptr->index);
5176 print_rtl (file, ptr->pattern);
5178 fprintf (file, "\n Loads : ");
5180 if (ptr->loads)
5181 print_rtl (file, ptr->loads);
5182 else
5183 fprintf (file, "(nil)");
5185 fprintf (file, "\n Stores : ");
5187 if (ptr->stores)
5188 print_rtl (file, ptr->stores);
5189 else
5190 fprintf (file, "(nil)");
5192 fprintf (file, "\n\n");
5195 fprintf (file, "\n");
5198 /* Returns 1 if X is in the list of ldst only expressions. */
5200 static struct ls_expr *
5201 find_rtx_in_ldst (rtx x)
5203 struct ls_expr e;
5204 void **slot;
5205 if (!pre_ldst_table)
5206 return NULL;
5207 e.pattern = x;
5208 slot = htab_find_slot (pre_ldst_table, &e, NO_INSERT);
5209 if (!slot || ((struct ls_expr *)*slot)->invalid)
5210 return NULL;
5211 return *slot;
5214 /* Assign each element of the list of mems a monotonically increasing value. */
5216 static int
5217 enumerate_ldsts (void)
5219 struct ls_expr * ptr;
5220 int n = 0;
5222 for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
5223 ptr->index = n++;
5225 return n;
5228 /* Return first item in the list. */
5230 static inline struct ls_expr *
5231 first_ls_expr (void)
5233 return pre_ldst_mems;
5236 /* Return the next item in the list after the specified one. */
5238 static inline struct ls_expr *
5239 next_ls_expr (struct ls_expr * ptr)
5241 return ptr->next;
5244 /* Load Motion for loads which only kill themselves. */
5246 /* Return true if x is a simple MEM operation, with no registers or
5247 side effects. These are the types of loads we consider for the
5248 ld_motion list, otherwise we let the usual aliasing take care of it. */
5250 static int
5251 simple_mem (const_rtx x)
5253 if (! MEM_P (x))
5254 return 0;
5256 if (MEM_VOLATILE_P (x))
5257 return 0;
5259 if (GET_MODE (x) == BLKmode)
5260 return 0;
5262 /* If we are handling exceptions, we must be careful with memory references
5263 that may trap. If we are not, the behavior is undefined, so we may just
5264 continue. */
5265 if (flag_non_call_exceptions && may_trap_p (x))
5266 return 0;
5268 if (side_effects_p (x))
5269 return 0;
5271 /* Do not consider function arguments passed on stack. */
5272 if (reg_mentioned_p (stack_pointer_rtx, x))
5273 return 0;
5275 if (flag_float_store && FLOAT_MODE_P (GET_MODE (x)))
5276 return 0;
5278 return 1;
5281 /* Make sure there isn't a buried reference in this pattern anywhere.
5282 If there is, invalidate the entry for it since we're not capable
5283 of fixing it up just yet.. We have to be sure we know about ALL
5284 loads since the aliasing code will allow all entries in the
5285 ld_motion list to not-alias itself. If we miss a load, we will get
5286 the wrong value since gcse might common it and we won't know to
5287 fix it up. */
5289 static void
5290 invalidate_any_buried_refs (rtx x)
5292 const char * fmt;
5293 int i, j;
5294 struct ls_expr * ptr;
5296 /* Invalidate it in the list. */
5297 if (MEM_P (x) && simple_mem (x))
5299 ptr = ldst_entry (x);
5300 ptr->invalid = 1;
5303 /* Recursively process the insn. */
5304 fmt = GET_RTX_FORMAT (GET_CODE (x));
5306 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5308 if (fmt[i] == 'e')
5309 invalidate_any_buried_refs (XEXP (x, i));
5310 else if (fmt[i] == 'E')
5311 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5312 invalidate_any_buried_refs (XVECEXP (x, i, j));
5316 /* Find all the 'simple' MEMs which are used in LOADs and STORES. Simple
5317 being defined as MEM loads and stores to symbols, with no side effects
5318 and no registers in the expression. For a MEM destination, we also
5319 check that the insn is still valid if we replace the destination with a
5320 REG, as is done in update_ld_motion_stores. If there are any uses/defs
5321 which don't match this criteria, they are invalidated and trimmed out
5322 later. */
5324 static void
5325 compute_ld_motion_mems (void)
5327 struct ls_expr * ptr;
5328 basic_block bb;
5329 rtx insn;
5331 pre_ldst_mems = NULL;
5332 pre_ldst_table = htab_create (13, pre_ldst_expr_hash,
5333 pre_ldst_expr_eq, NULL);
5335 FOR_EACH_BB (bb)
5337 FOR_BB_INSNS (bb, insn)
5339 if (INSN_P (insn))
5341 if (GET_CODE (PATTERN (insn)) == SET)
5343 rtx src = SET_SRC (PATTERN (insn));
5344 rtx dest = SET_DEST (PATTERN (insn));
5346 /* Check for a simple LOAD... */
5347 if (MEM_P (src) && simple_mem (src))
5349 ptr = ldst_entry (src);
5350 if (REG_P (dest))
5351 ptr->loads = alloc_INSN_LIST (insn, ptr->loads);
5352 else
5353 ptr->invalid = 1;
5355 else
5357 /* Make sure there isn't a buried load somewhere. */
5358 invalidate_any_buried_refs (src);
5361 /* Check for stores. Don't worry about aliased ones, they
5362 will block any movement we might do later. We only care
5363 about this exact pattern since those are the only
5364 circumstance that we will ignore the aliasing info. */
5365 if (MEM_P (dest) && simple_mem (dest))
5367 ptr = ldst_entry (dest);
5369 if (! MEM_P (src)
5370 && GET_CODE (src) != ASM_OPERANDS
5371 /* Check for REG manually since want_to_gcse_p
5372 returns 0 for all REGs. */
5373 && can_assign_to_reg_p (src))
5374 ptr->stores = alloc_INSN_LIST (insn, ptr->stores);
5375 else
5376 ptr->invalid = 1;
5379 else
5380 invalidate_any_buried_refs (PATTERN (insn));
5386 /* Remove any references that have been either invalidated or are not in the
5387 expression list for pre gcse. */
5389 static void
5390 trim_ld_motion_mems (void)
5392 struct ls_expr * * last = & pre_ldst_mems;
5393 struct ls_expr * ptr = pre_ldst_mems;
5395 while (ptr != NULL)
5397 struct expr * expr;
5399 /* Delete if entry has been made invalid. */
5400 if (! ptr->invalid)
5402 /* Delete if we cannot find this mem in the expression list. */
5403 unsigned int hash = ptr->hash_index % expr_hash_table.size;
5405 for (expr = expr_hash_table.table[hash];
5406 expr != NULL;
5407 expr = expr->next_same_hash)
5408 if (expr_equiv_p (expr->expr, ptr->pattern))
5409 break;
5411 else
5412 expr = (struct expr *) 0;
5414 if (expr)
5416 /* Set the expression field if we are keeping it. */
5417 ptr->expr = expr;
5418 last = & ptr->next;
5419 ptr = ptr->next;
5421 else
5423 *last = ptr->next;
5424 htab_remove_elt_with_hash (pre_ldst_table, ptr, ptr->hash_index);
5425 free_ldst_entry (ptr);
5426 ptr = * last;
5430 /* Show the world what we've found. */
5431 if (dump_file && pre_ldst_mems != NULL)
5432 print_ldst_list (dump_file);
5435 /* This routine will take an expression which we are replacing with
5436 a reaching register, and update any stores that are needed if
5437 that expression is in the ld_motion list. Stores are updated by
5438 copying their SRC to the reaching register, and then storing
5439 the reaching register into the store location. These keeps the
5440 correct value in the reaching register for the loads. */
5442 static void
5443 update_ld_motion_stores (struct expr * expr)
5445 struct ls_expr * mem_ptr;
5447 if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
5449 /* We can try to find just the REACHED stores, but is shouldn't
5450 matter to set the reaching reg everywhere... some might be
5451 dead and should be eliminated later. */
5453 /* We replace (set mem expr) with (set reg expr) (set mem reg)
5454 where reg is the reaching reg used in the load. We checked in
5455 compute_ld_motion_mems that we can replace (set mem expr) with
5456 (set reg expr) in that insn. */
5457 rtx list = mem_ptr->stores;
5459 for ( ; list != NULL_RTX; list = XEXP (list, 1))
5461 rtx insn = XEXP (list, 0);
5462 rtx pat = PATTERN (insn);
5463 rtx src = SET_SRC (pat);
5464 rtx reg = expr->reaching_reg;
5465 rtx copy, new;
5467 /* If we've already copied it, continue. */
5468 if (expr->reaching_reg == src)
5469 continue;
5471 if (dump_file)
5473 fprintf (dump_file, "PRE: store updated with reaching reg ");
5474 print_rtl (dump_file, expr->reaching_reg);
5475 fprintf (dump_file, ":\n ");
5476 print_inline_rtx (dump_file, insn, 8);
5477 fprintf (dump_file, "\n");
5480 copy = gen_move_insn ( reg, copy_rtx (SET_SRC (pat)));
5481 new = emit_insn_before (copy, insn);
5482 record_one_set (REGNO (reg), new);
5483 SET_SRC (pat) = reg;
5484 df_insn_rescan (insn);
5486 /* un-recognize this pattern since it's probably different now. */
5487 INSN_CODE (insn) = -1;
5488 gcse_create_count++;
5493 /* Store motion code. */
5495 #define ANTIC_STORE_LIST(x) ((x)->loads)
5496 #define AVAIL_STORE_LIST(x) ((x)->stores)
5497 #define LAST_AVAIL_CHECK_FAILURE(x) ((x)->reaching_reg)
5499 /* This is used to communicate the target bitvector we want to use in the
5500 reg_set_info routine when called via the note_stores mechanism. */
5501 static int * regvec;
5503 /* And current insn, for the same routine. */
5504 static rtx compute_store_table_current_insn;
5506 /* Used in computing the reverse edge graph bit vectors. */
5507 static sbitmap * st_antloc;
5509 /* Global holding the number of store expressions we are dealing with. */
5510 static int num_stores;
5512 /* Checks to set if we need to mark a register set. Called from
5513 note_stores. */
5515 static void
5516 reg_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
5517 void *data)
5519 sbitmap bb_reg = data;
5521 if (GET_CODE (dest) == SUBREG)
5522 dest = SUBREG_REG (dest);
5524 if (REG_P (dest))
5526 regvec[REGNO (dest)] = INSN_UID (compute_store_table_current_insn);
5527 if (bb_reg)
5528 SET_BIT (bb_reg, REGNO (dest));
5532 /* Clear any mark that says that this insn sets dest. Called from
5533 note_stores. */
5535 static void
5536 reg_clear_last_set (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
5537 void *data)
5539 int *dead_vec = data;
5541 if (GET_CODE (dest) == SUBREG)
5542 dest = SUBREG_REG (dest);
5544 if (REG_P (dest) &&
5545 dead_vec[REGNO (dest)] == INSN_UID (compute_store_table_current_insn))
5546 dead_vec[REGNO (dest)] = 0;
5549 /* Return zero if some of the registers in list X are killed
5550 due to set of registers in bitmap REGS_SET. */
5552 static bool
5553 store_ops_ok (const_rtx x, int *regs_set)
5555 const_rtx reg;
5557 for (; x; x = XEXP (x, 1))
5559 reg = XEXP (x, 0);
5560 if (regs_set[REGNO(reg)])
5561 return false;
5564 return true;
5567 /* Returns a list of registers mentioned in X. */
5568 static rtx
5569 extract_mentioned_regs (rtx x)
5571 return extract_mentioned_regs_helper (x, NULL_RTX);
5574 /* Helper for extract_mentioned_regs; ACCUM is used to accumulate used
5575 registers. */
5576 static rtx
5577 extract_mentioned_regs_helper (rtx x, rtx accum)
5579 int i;
5580 enum rtx_code code;
5581 const char * fmt;
5583 /* Repeat is used to turn tail-recursion into iteration. */
5584 repeat:
5586 if (x == 0)
5587 return accum;
5589 code = GET_CODE (x);
5590 switch (code)
5592 case REG:
5593 return alloc_EXPR_LIST (0, x, accum);
5595 case MEM:
5596 x = XEXP (x, 0);
5597 goto repeat;
5599 case PRE_DEC:
5600 case PRE_INC:
5601 case PRE_MODIFY:
5602 case POST_DEC:
5603 case POST_INC:
5604 case POST_MODIFY:
5605 /* We do not run this function with arguments having side effects. */
5606 gcc_unreachable ();
5608 case PC:
5609 case CC0: /*FIXME*/
5610 case CONST:
5611 case CONST_INT:
5612 case CONST_DOUBLE:
5613 case CONST_FIXED:
5614 case CONST_VECTOR:
5615 case SYMBOL_REF:
5616 case LABEL_REF:
5617 case ADDR_VEC:
5618 case ADDR_DIFF_VEC:
5619 return accum;
5621 default:
5622 break;
5625 i = GET_RTX_LENGTH (code) - 1;
5626 fmt = GET_RTX_FORMAT (code);
5628 for (; i >= 0; i--)
5630 if (fmt[i] == 'e')
5632 rtx tem = XEXP (x, i);
5634 /* If we are about to do the last recursive call
5635 needed at this level, change it into iteration. */
5636 if (i == 0)
5638 x = tem;
5639 goto repeat;
5642 accum = extract_mentioned_regs_helper (tem, accum);
5644 else if (fmt[i] == 'E')
5646 int j;
5648 for (j = 0; j < XVECLEN (x, i); j++)
5649 accum = extract_mentioned_regs_helper (XVECEXP (x, i, j), accum);
5653 return accum;
5656 /* Determine whether INSN is MEM store pattern that we will consider moving.
5657 REGS_SET_BEFORE is bitmap of registers set before (and including) the
5658 current insn, REGS_SET_AFTER is bitmap of registers set after (and
5659 including) the insn in this basic block. We must be passing through BB from
5660 head to end, as we are using this fact to speed things up.
5662 The results are stored this way:
5664 -- the first anticipatable expression is added into ANTIC_STORE_LIST
5665 -- if the processed expression is not anticipatable, NULL_RTX is added
5666 there instead, so that we can use it as indicator that no further
5667 expression of this type may be anticipatable
5668 -- if the expression is available, it is added as head of AVAIL_STORE_LIST;
5669 consequently, all of them but this head are dead and may be deleted.
5670 -- if the expression is not available, the insn due to that it fails to be
5671 available is stored in reaching_reg.
5673 The things are complicated a bit by fact that there already may be stores
5674 to the same MEM from other blocks; also caller must take care of the
5675 necessary cleanup of the temporary markers after end of the basic block.
5678 static void
5679 find_moveable_store (rtx insn, int *regs_set_before, int *regs_set_after)
5681 struct ls_expr * ptr;
5682 rtx dest, set, tmp;
5683 int check_anticipatable, check_available;
5684 basic_block bb = BLOCK_FOR_INSN (insn);
5686 set = single_set (insn);
5687 if (!set)
5688 return;
5690 dest = SET_DEST (set);
5692 if (! MEM_P (dest) || MEM_VOLATILE_P (dest)
5693 || GET_MODE (dest) == BLKmode)
5694 return;
5696 if (side_effects_p (dest))
5697 return;
5699 /* If we are handling exceptions, we must be careful with memory references
5700 that may trap. If we are not, the behavior is undefined, so we may just
5701 continue. */
5702 if (flag_non_call_exceptions && may_trap_p (dest))
5703 return;
5705 /* Even if the destination cannot trap, the source may. In this case we'd
5706 need to handle updating the REG_EH_REGION note. */
5707 if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
5708 return;
5710 /* Make sure that the SET_SRC of this store insns can be assigned to
5711 a register, or we will fail later on in replace_store_insn, which
5712 assumes that we can do this. But sometimes the target machine has
5713 oddities like MEM read-modify-write instruction. See for example
5714 PR24257. */
5715 if (!can_assign_to_reg_p (SET_SRC (set)))
5716 return;
5718 ptr = ldst_entry (dest);
5719 if (!ptr->pattern_regs)
5720 ptr->pattern_regs = extract_mentioned_regs (dest);
5722 /* Do not check for anticipatability if we either found one anticipatable
5723 store already, or tested for one and found out that it was killed. */
5724 check_anticipatable = 0;
5725 if (!ANTIC_STORE_LIST (ptr))
5726 check_anticipatable = 1;
5727 else
5729 tmp = XEXP (ANTIC_STORE_LIST (ptr), 0);
5730 if (tmp != NULL_RTX
5731 && BLOCK_FOR_INSN (tmp) != bb)
5732 check_anticipatable = 1;
5734 if (check_anticipatable)
5736 if (store_killed_before (dest, ptr->pattern_regs, insn, bb, regs_set_before))
5737 tmp = NULL_RTX;
5738 else
5739 tmp = insn;
5740 ANTIC_STORE_LIST (ptr) = alloc_INSN_LIST (tmp,
5741 ANTIC_STORE_LIST (ptr));
5744 /* It is not necessary to check whether store is available if we did
5745 it successfully before; if we failed before, do not bother to check
5746 until we reach the insn that caused us to fail. */
5747 check_available = 0;
5748 if (!AVAIL_STORE_LIST (ptr))
5749 check_available = 1;
5750 else
5752 tmp = XEXP (AVAIL_STORE_LIST (ptr), 0);
5753 if (BLOCK_FOR_INSN (tmp) != bb)
5754 check_available = 1;
5756 if (check_available)
5758 /* Check that we have already reached the insn at that the check
5759 failed last time. */
5760 if (LAST_AVAIL_CHECK_FAILURE (ptr))
5762 for (tmp = BB_END (bb);
5763 tmp != insn && tmp != LAST_AVAIL_CHECK_FAILURE (ptr);
5764 tmp = PREV_INSN (tmp))
5765 continue;
5766 if (tmp == insn)
5767 check_available = 0;
5769 else
5770 check_available = store_killed_after (dest, ptr->pattern_regs, insn,
5771 bb, regs_set_after,
5772 &LAST_AVAIL_CHECK_FAILURE (ptr));
5774 if (!check_available)
5775 AVAIL_STORE_LIST (ptr) = alloc_INSN_LIST (insn, AVAIL_STORE_LIST (ptr));
5778 /* Find available and anticipatable stores. */
5780 static int
5781 compute_store_table (void)
5783 int ret;
5784 basic_block bb;
5785 unsigned regno;
5786 rtx insn, pat, tmp;
5787 int *last_set_in, *already_set;
5788 struct ls_expr * ptr, **prev_next_ptr_ptr;
5790 max_gcse_regno = max_reg_num ();
5792 reg_set_in_block = sbitmap_vector_alloc (last_basic_block,
5793 max_gcse_regno);
5794 sbitmap_vector_zero (reg_set_in_block, last_basic_block);
5795 pre_ldst_mems = 0;
5796 pre_ldst_table = htab_create (13, pre_ldst_expr_hash,
5797 pre_ldst_expr_eq, NULL);
5798 last_set_in = XCNEWVEC (int, max_gcse_regno);
5799 already_set = XNEWVEC (int, max_gcse_regno);
5801 /* Find all the stores we care about. */
5802 FOR_EACH_BB (bb)
5804 /* First compute the registers set in this block. */
5805 regvec = last_set_in;
5807 FOR_BB_INSNS (bb, insn)
5809 if (! INSN_P (insn))
5810 continue;
5812 if (CALL_P (insn))
5814 HARD_REG_SET clobbered_regs;
5816 get_call_invalidated_used_regs (insn, &clobbered_regs, true);
5817 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5818 if (TEST_HARD_REG_BIT (clobbered_regs, regno))
5820 last_set_in[regno] = INSN_UID (insn);
5821 SET_BIT (reg_set_in_block[bb->index], regno);
5825 pat = PATTERN (insn);
5826 compute_store_table_current_insn = insn;
5827 note_stores (pat, reg_set_info, reg_set_in_block[bb->index]);
5830 /* Now find the stores. */
5831 memset (already_set, 0, sizeof (int) * max_gcse_regno);
5832 regvec = already_set;
5833 FOR_BB_INSNS (bb, insn)
5835 if (! INSN_P (insn))
5836 continue;
5838 if (CALL_P (insn))
5840 HARD_REG_SET clobbered_regs;
5842 get_call_invalidated_used_regs (insn, &clobbered_regs, true);
5843 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5844 if (TEST_HARD_REG_BIT (clobbered_regs, regno))
5845 already_set[regno] = 1;
5848 pat = PATTERN (insn);
5849 note_stores (pat, reg_set_info, NULL);
5851 /* Now that we've marked regs, look for stores. */
5852 find_moveable_store (insn, already_set, last_set_in);
5854 /* Unmark regs that are no longer set. */
5855 compute_store_table_current_insn = insn;
5856 note_stores (pat, reg_clear_last_set, last_set_in);
5857 if (CALL_P (insn))
5859 HARD_REG_SET clobbered_regs;
5861 get_call_invalidated_used_regs (insn, &clobbered_regs, true);
5862 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5863 if (TEST_HARD_REG_BIT (clobbered_regs, regno)
5864 && last_set_in[regno] == INSN_UID (insn))
5865 last_set_in[regno] = 0;
5869 #ifdef ENABLE_CHECKING
5870 /* last_set_in should now be all-zero. */
5871 for (regno = 0; regno < max_gcse_regno; regno++)
5872 gcc_assert (!last_set_in[regno]);
5873 #endif
5875 /* Clear temporary marks. */
5876 for (ptr = first_ls_expr (); ptr != NULL; ptr = next_ls_expr (ptr))
5878 LAST_AVAIL_CHECK_FAILURE(ptr) = NULL_RTX;
5879 if (ANTIC_STORE_LIST (ptr)
5880 && (tmp = XEXP (ANTIC_STORE_LIST (ptr), 0)) == NULL_RTX)
5881 ANTIC_STORE_LIST (ptr) = XEXP (ANTIC_STORE_LIST (ptr), 1);
5885 /* Remove the stores that are not available anywhere, as there will
5886 be no opportunity to optimize them. */
5887 for (ptr = pre_ldst_mems, prev_next_ptr_ptr = &pre_ldst_mems;
5888 ptr != NULL;
5889 ptr = *prev_next_ptr_ptr)
5891 if (!AVAIL_STORE_LIST (ptr))
5893 *prev_next_ptr_ptr = ptr->next;
5894 htab_remove_elt_with_hash (pre_ldst_table, ptr, ptr->hash_index);
5895 free_ldst_entry (ptr);
5897 else
5898 prev_next_ptr_ptr = &ptr->next;
5901 ret = enumerate_ldsts ();
5903 if (dump_file)
5905 fprintf (dump_file, "ST_avail and ST_antic (shown under loads..)\n");
5906 print_ldst_list (dump_file);
5909 free (last_set_in);
5910 free (already_set);
5911 return ret;
5914 /* Check to see if the load X is aliased with STORE_PATTERN.
5915 AFTER is true if we are checking the case when STORE_PATTERN occurs
5916 after the X. */
5918 static bool
5919 load_kills_store (const_rtx x, const_rtx store_pattern, int after)
5921 if (after)
5922 return anti_dependence (x, store_pattern);
5923 else
5924 return true_dependence (store_pattern, GET_MODE (store_pattern), x,
5925 rtx_addr_varies_p);
5928 /* Go through the entire insn X, looking for any loads which might alias
5929 STORE_PATTERN. Return true if found.
5930 AFTER is true if we are checking the case when STORE_PATTERN occurs
5931 after the insn X. */
5933 static bool
5934 find_loads (const_rtx x, const_rtx store_pattern, int after)
5936 const char * fmt;
5937 int i, j;
5938 int ret = false;
5940 if (!x)
5941 return false;
5943 if (GET_CODE (x) == SET)
5944 x = SET_SRC (x);
5946 if (MEM_P (x))
5948 if (load_kills_store (x, store_pattern, after))
5949 return true;
5952 /* Recursively process the insn. */
5953 fmt = GET_RTX_FORMAT (GET_CODE (x));
5955 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0 && !ret; i--)
5957 if (fmt[i] == 'e')
5958 ret |= find_loads (XEXP (x, i), store_pattern, after);
5959 else if (fmt[i] == 'E')
5960 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5961 ret |= find_loads (XVECEXP (x, i, j), store_pattern, after);
5963 return ret;
5966 static inline bool
5967 store_killed_in_pat (const_rtx x, const_rtx pat, int after)
5969 if (GET_CODE (pat) == SET)
5971 rtx dest = SET_DEST (pat);
5973 if (GET_CODE (dest) == ZERO_EXTRACT)
5974 dest = XEXP (dest, 0);
5976 /* Check for memory stores to aliased objects. */
5977 if (MEM_P (dest)
5978 && !expr_equiv_p (dest, x))
5980 if (after)
5982 if (output_dependence (dest, x))
5983 return true;
5985 else
5987 if (output_dependence (x, dest))
5988 return true;
5993 if (find_loads (pat, x, after))
5994 return true;
5996 return false;
5999 /* Check if INSN kills the store pattern X (is aliased with it).
6000 AFTER is true if we are checking the case when store X occurs
6001 after the insn. Return true if it does. */
6003 static bool
6004 store_killed_in_insn (const_rtx x, const_rtx x_regs, const_rtx insn, int after)
6006 const_rtx reg, base, note, pat;
6008 if (!INSN_P (insn))
6009 return false;
6011 if (CALL_P (insn))
6013 /* A normal or pure call might read from pattern,
6014 but a const call will not. */
6015 if (!RTL_CONST_CALL_P (insn))
6016 return true;
6018 /* But even a const call reads its parameters. Check whether the
6019 base of some of registers used in mem is stack pointer. */
6020 for (reg = x_regs; reg; reg = XEXP (reg, 1))
6022 base = find_base_term (XEXP (reg, 0));
6023 if (!base
6024 || (GET_CODE (base) == ADDRESS
6025 && GET_MODE (base) == Pmode
6026 && XEXP (base, 0) == stack_pointer_rtx))
6027 return true;
6030 return false;
6033 pat = PATTERN (insn);
6034 if (GET_CODE (pat) == SET)
6036 if (store_killed_in_pat (x, pat, after))
6037 return true;
6039 else if (GET_CODE (pat) == PARALLEL)
6041 int i;
6043 for (i = 0; i < XVECLEN (pat, 0); i++)
6044 if (store_killed_in_pat (x, XVECEXP (pat, 0, i), after))
6045 return true;
6047 else if (find_loads (PATTERN (insn), x, after))
6048 return true;
6050 /* If this insn has a REG_EQUAL or REG_EQUIV note referencing a memory
6051 location aliased with X, then this insn kills X. */
6052 note = find_reg_equal_equiv_note (insn);
6053 if (! note)
6054 return false;
6055 note = XEXP (note, 0);
6057 /* However, if the note represents a must alias rather than a may
6058 alias relationship, then it does not kill X. */
6059 if (expr_equiv_p (note, x))
6060 return false;
6062 /* See if there are any aliased loads in the note. */
6063 return find_loads (note, x, after);
6066 /* Returns true if the expression X is loaded or clobbered on or after INSN
6067 within basic block BB. REGS_SET_AFTER is bitmap of registers set in
6068 or after the insn. X_REGS is list of registers mentioned in X. If the store
6069 is killed, return the last insn in that it occurs in FAIL_INSN. */
6071 static bool
6072 store_killed_after (const_rtx x, const_rtx x_regs, const_rtx insn, const_basic_block bb,
6073 int *regs_set_after, rtx *fail_insn)
6075 rtx last = BB_END (bb), act;
6077 if (!store_ops_ok (x_regs, regs_set_after))
6079 /* We do not know where it will happen. */
6080 if (fail_insn)
6081 *fail_insn = NULL_RTX;
6082 return true;
6085 /* Scan from the end, so that fail_insn is determined correctly. */
6086 for (act = last; act != PREV_INSN (insn); act = PREV_INSN (act))
6087 if (store_killed_in_insn (x, x_regs, act, false))
6089 if (fail_insn)
6090 *fail_insn = act;
6091 return true;
6094 return false;
6097 /* Returns true if the expression X is loaded or clobbered on or before INSN
6098 within basic block BB. X_REGS is list of registers mentioned in X.
6099 REGS_SET_BEFORE is bitmap of registers set before or in this insn. */
6100 static bool
6101 store_killed_before (const_rtx x, const_rtx x_regs, const_rtx insn, const_basic_block bb,
6102 int *regs_set_before)
6104 rtx first = BB_HEAD (bb);
6106 if (!store_ops_ok (x_regs, regs_set_before))
6107 return true;
6109 for ( ; insn != PREV_INSN (first); insn = PREV_INSN (insn))
6110 if (store_killed_in_insn (x, x_regs, insn, true))
6111 return true;
6113 return false;
6116 /* Fill in available, anticipatable, transparent and kill vectors in
6117 STORE_DATA, based on lists of available and anticipatable stores. */
6118 static void
6119 build_store_vectors (void)
6121 basic_block bb;
6122 int *regs_set_in_block;
6123 rtx insn, st;
6124 struct ls_expr * ptr;
6125 unsigned regno;
6127 /* Build the gen_vector. This is any store in the table which is not killed
6128 by aliasing later in its block. */
6129 ae_gen = sbitmap_vector_alloc (last_basic_block, num_stores);
6130 sbitmap_vector_zero (ae_gen, last_basic_block);
6132 st_antloc = sbitmap_vector_alloc (last_basic_block, num_stores);
6133 sbitmap_vector_zero (st_antloc, last_basic_block);
6135 for (ptr = first_ls_expr (); ptr != NULL; ptr = next_ls_expr (ptr))
6137 for (st = AVAIL_STORE_LIST (ptr); st != NULL; st = XEXP (st, 1))
6139 insn = XEXP (st, 0);
6140 bb = BLOCK_FOR_INSN (insn);
6142 /* If we've already seen an available expression in this block,
6143 we can delete this one (It occurs earlier in the block). We'll
6144 copy the SRC expression to an unused register in case there
6145 are any side effects. */
6146 if (TEST_BIT (ae_gen[bb->index], ptr->index))
6148 rtx r = gen_reg_rtx_and_attrs (ptr->pattern);
6149 if (dump_file)
6150 fprintf (dump_file, "Removing redundant store:\n");
6151 replace_store_insn (r, XEXP (st, 0), bb, ptr);
6152 continue;
6154 SET_BIT (ae_gen[bb->index], ptr->index);
6157 for (st = ANTIC_STORE_LIST (ptr); st != NULL; st = XEXP (st, 1))
6159 insn = XEXP (st, 0);
6160 bb = BLOCK_FOR_INSN (insn);
6161 SET_BIT (st_antloc[bb->index], ptr->index);
6165 ae_kill = sbitmap_vector_alloc (last_basic_block, num_stores);
6166 sbitmap_vector_zero (ae_kill, last_basic_block);
6168 transp = sbitmap_vector_alloc (last_basic_block, num_stores);
6169 sbitmap_vector_zero (transp, last_basic_block);
6170 regs_set_in_block = XNEWVEC (int, max_gcse_regno);
6172 FOR_EACH_BB (bb)
6174 for (regno = 0; regno < max_gcse_regno; regno++)
6175 regs_set_in_block[regno] = TEST_BIT (reg_set_in_block[bb->index], regno);
6177 for (ptr = first_ls_expr (); ptr != NULL; ptr = next_ls_expr (ptr))
6179 if (store_killed_after (ptr->pattern, ptr->pattern_regs, BB_HEAD (bb),
6180 bb, regs_set_in_block, NULL))
6182 /* It should not be necessary to consider the expression
6183 killed if it is both anticipatable and available. */
6184 if (!TEST_BIT (st_antloc[bb->index], ptr->index)
6185 || !TEST_BIT (ae_gen[bb->index], ptr->index))
6186 SET_BIT (ae_kill[bb->index], ptr->index);
6188 else
6189 SET_BIT (transp[bb->index], ptr->index);
6193 free (regs_set_in_block);
6195 if (dump_file)
6197 dump_sbitmap_vector (dump_file, "st_antloc", "", st_antloc, last_basic_block);
6198 dump_sbitmap_vector (dump_file, "st_kill", "", ae_kill, last_basic_block);
6199 dump_sbitmap_vector (dump_file, "Transpt", "", transp, last_basic_block);
6200 dump_sbitmap_vector (dump_file, "st_avloc", "", ae_gen, last_basic_block);
6204 /* Insert an instruction at the beginning of a basic block, and update
6205 the BB_HEAD if needed. */
6207 static void
6208 insert_insn_start_basic_block (rtx insn, basic_block bb)
6210 /* Insert at start of successor block. */
6211 rtx prev = PREV_INSN (BB_HEAD (bb));
6212 rtx before = BB_HEAD (bb);
6213 while (before != 0)
6215 if (! LABEL_P (before)
6216 && !NOTE_INSN_BASIC_BLOCK_P (before))
6217 break;
6218 prev = before;
6219 if (prev == BB_END (bb))
6220 break;
6221 before = NEXT_INSN (before);
6224 insn = emit_insn_after_noloc (insn, prev, bb);
6226 if (dump_file)
6228 fprintf (dump_file, "STORE_MOTION insert store at start of BB %d:\n",
6229 bb->index);
6230 print_inline_rtx (dump_file, insn, 6);
6231 fprintf (dump_file, "\n");
6235 /* This routine will insert a store on an edge. EXPR is the ldst entry for
6236 the memory reference, and E is the edge to insert it on. Returns nonzero
6237 if an edge insertion was performed. */
6239 static int
6240 insert_store (struct ls_expr * expr, edge e)
6242 rtx reg, insn;
6243 basic_block bb;
6244 edge tmp;
6245 edge_iterator ei;
6247 /* We did all the deleted before this insert, so if we didn't delete a
6248 store, then we haven't set the reaching reg yet either. */
6249 if (expr->reaching_reg == NULL_RTX)
6250 return 0;
6252 if (e->flags & EDGE_FAKE)
6253 return 0;
6255 reg = expr->reaching_reg;
6256 insn = gen_move_insn (copy_rtx (expr->pattern), reg);
6258 /* If we are inserting this expression on ALL predecessor edges of a BB,
6259 insert it at the start of the BB, and reset the insert bits on the other
6260 edges so we don't try to insert it on the other edges. */
6261 bb = e->dest;
6262 FOR_EACH_EDGE (tmp, ei, e->dest->preds)
6263 if (!(tmp->flags & EDGE_FAKE))
6265 int index = EDGE_INDEX (edge_list, tmp->src, tmp->dest);
6267 gcc_assert (index != EDGE_INDEX_NO_EDGE);
6268 if (! TEST_BIT (pre_insert_map[index], expr->index))
6269 break;
6272 /* If tmp is NULL, we found an insertion on every edge, blank the
6273 insertion vector for these edges, and insert at the start of the BB. */
6274 if (!tmp && bb != EXIT_BLOCK_PTR)
6276 FOR_EACH_EDGE (tmp, ei, e->dest->preds)
6278 int index = EDGE_INDEX (edge_list, tmp->src, tmp->dest);
6279 RESET_BIT (pre_insert_map[index], expr->index);
6281 insert_insn_start_basic_block (insn, bb);
6282 return 0;
6285 /* We can't put stores in the front of blocks pointed to by abnormal
6286 edges since that may put a store where one didn't used to be. */
6287 gcc_assert (!(e->flags & EDGE_ABNORMAL));
6289 insert_insn_on_edge (insn, e);
6291 if (dump_file)
6293 fprintf (dump_file, "STORE_MOTION insert insn on edge (%d, %d):\n",
6294 e->src->index, e->dest->index);
6295 print_inline_rtx (dump_file, insn, 6);
6296 fprintf (dump_file, "\n");
6299 return 1;
6302 /* Remove any REG_EQUAL or REG_EQUIV notes containing a reference to the
6303 memory location in SMEXPR set in basic block BB.
6305 This could be rather expensive. */
6307 static void
6308 remove_reachable_equiv_notes (basic_block bb, struct ls_expr *smexpr)
6310 edge_iterator *stack, ei;
6311 int sp;
6312 edge act;
6313 sbitmap visited = sbitmap_alloc (last_basic_block);
6314 rtx last, insn, note;
6315 rtx mem = smexpr->pattern;
6317 stack = XNEWVEC (edge_iterator, n_basic_blocks);
6318 sp = 0;
6319 ei = ei_start (bb->succs);
6321 sbitmap_zero (visited);
6323 act = (EDGE_COUNT (ei_container (ei)) > 0 ? EDGE_I (ei_container (ei), 0) : NULL);
6324 while (1)
6326 if (!act)
6328 if (!sp)
6330 free (stack);
6331 sbitmap_free (visited);
6332 return;
6334 act = ei_edge (stack[--sp]);
6336 bb = act->dest;
6338 if (bb == EXIT_BLOCK_PTR
6339 || TEST_BIT (visited, bb->index))
6341 if (!ei_end_p (ei))
6342 ei_next (&ei);
6343 act = (! ei_end_p (ei)) ? ei_edge (ei) : NULL;
6344 continue;
6346 SET_BIT (visited, bb->index);
6348 if (TEST_BIT (st_antloc[bb->index], smexpr->index))
6350 for (last = ANTIC_STORE_LIST (smexpr);
6351 BLOCK_FOR_INSN (XEXP (last, 0)) != bb;
6352 last = XEXP (last, 1))
6353 continue;
6354 last = XEXP (last, 0);
6356 else
6357 last = NEXT_INSN (BB_END (bb));
6359 for (insn = BB_HEAD (bb); insn != last; insn = NEXT_INSN (insn))
6360 if (INSN_P (insn))
6362 note = find_reg_equal_equiv_note (insn);
6363 if (!note || !expr_equiv_p (XEXP (note, 0), mem))
6364 continue;
6366 if (dump_file)
6367 fprintf (dump_file, "STORE_MOTION drop REG_EQUAL note at insn %d:\n",
6368 INSN_UID (insn));
6369 remove_note (insn, note);
6372 if (!ei_end_p (ei))
6373 ei_next (&ei);
6374 act = (! ei_end_p (ei)) ? ei_edge (ei) : NULL;
6376 if (EDGE_COUNT (bb->succs) > 0)
6378 if (act)
6379 stack[sp++] = ei;
6380 ei = ei_start (bb->succs);
6381 act = (EDGE_COUNT (ei_container (ei)) > 0 ? EDGE_I (ei_container (ei), 0) : NULL);
6386 /* This routine will replace a store with a SET to a specified register. */
6388 static void
6389 replace_store_insn (rtx reg, rtx del, basic_block bb, struct ls_expr *smexpr)
6391 rtx insn, mem, note, set, ptr, pair;
6393 mem = smexpr->pattern;
6394 insn = gen_move_insn (reg, SET_SRC (single_set (del)));
6396 for (ptr = ANTIC_STORE_LIST (smexpr); ptr; ptr = XEXP (ptr, 1))
6397 if (XEXP (ptr, 0) == del)
6399 XEXP (ptr, 0) = insn;
6400 break;
6403 /* Move the notes from the deleted insn to its replacement, and patch
6404 up the LIBCALL notes. */
6405 REG_NOTES (insn) = REG_NOTES (del);
6407 note = find_reg_note (insn, REG_RETVAL, NULL_RTX);
6408 if (note)
6410 pair = XEXP (note, 0);
6411 note = find_reg_note (pair, REG_LIBCALL, NULL_RTX);
6412 XEXP (note, 0) = insn;
6414 note = find_reg_note (insn, REG_LIBCALL, NULL_RTX);
6415 if (note)
6417 pair = XEXP (note, 0);
6418 note = find_reg_note (pair, REG_RETVAL, NULL_RTX);
6419 XEXP (note, 0) = insn;
6422 /* Emit the insn AFTER all the notes are transferred.
6423 This is cheaper since we avoid df rescanning for the note change. */
6424 insn = emit_insn_after (insn, del);
6426 if (dump_file)
6428 fprintf (dump_file,
6429 "STORE_MOTION delete insn in BB %d:\n ", bb->index);
6430 print_inline_rtx (dump_file, del, 6);
6431 fprintf (dump_file, "\nSTORE MOTION replaced with insn:\n ");
6432 print_inline_rtx (dump_file, insn, 6);
6433 fprintf (dump_file, "\n");
6436 delete_insn (del);
6438 /* Now we must handle REG_EQUAL notes whose contents is equal to the mem;
6439 they are no longer accurate provided that they are reached by this
6440 definition, so drop them. */
6441 for (; insn != NEXT_INSN (BB_END (bb)); insn = NEXT_INSN (insn))
6442 if (INSN_P (insn))
6444 set = single_set (insn);
6445 if (!set)
6446 continue;
6447 if (expr_equiv_p (SET_DEST (set), mem))
6448 return;
6449 note = find_reg_equal_equiv_note (insn);
6450 if (!note || !expr_equiv_p (XEXP (note, 0), mem))
6451 continue;
6453 if (dump_file)
6454 fprintf (dump_file, "STORE_MOTION drop REG_EQUAL note at insn %d:\n",
6455 INSN_UID (insn));
6456 remove_note (insn, note);
6458 remove_reachable_equiv_notes (bb, smexpr);
6462 /* Delete a store, but copy the value that would have been stored into
6463 the reaching_reg for later storing. */
6465 static void
6466 delete_store (struct ls_expr * expr, basic_block bb)
6468 rtx reg, i, del;
6470 if (expr->reaching_reg == NULL_RTX)
6471 expr->reaching_reg = gen_reg_rtx_and_attrs (expr->pattern);
6473 reg = expr->reaching_reg;
6475 for (i = AVAIL_STORE_LIST (expr); i; i = XEXP (i, 1))
6477 del = XEXP (i, 0);
6478 if (BLOCK_FOR_INSN (del) == bb)
6480 /* We know there is only one since we deleted redundant
6481 ones during the available computation. */
6482 replace_store_insn (reg, del, bb, expr);
6483 break;
6488 /* Free memory used by store motion. */
6490 static void
6491 free_store_memory (void)
6493 free_ldst_mems ();
6495 if (ae_gen)
6496 sbitmap_vector_free (ae_gen);
6497 if (ae_kill)
6498 sbitmap_vector_free (ae_kill);
6499 if (transp)
6500 sbitmap_vector_free (transp);
6501 if (st_antloc)
6502 sbitmap_vector_free (st_antloc);
6503 if (pre_insert_map)
6504 sbitmap_vector_free (pre_insert_map);
6505 if (pre_delete_map)
6506 sbitmap_vector_free (pre_delete_map);
6507 if (reg_set_in_block)
6508 sbitmap_vector_free (reg_set_in_block);
6510 ae_gen = ae_kill = transp = st_antloc = NULL;
6511 pre_insert_map = pre_delete_map = reg_set_in_block = NULL;
6514 /* Perform store motion. Much like gcse, except we move expressions the
6515 other way by looking at the flowgraph in reverse. */
6517 static void
6518 store_motion (void)
6520 basic_block bb;
6521 int x;
6522 struct ls_expr * ptr;
6523 int update_flow = 0;
6525 if (dump_file)
6527 fprintf (dump_file, "before store motion\n");
6528 print_rtl (dump_file, get_insns ());
6531 init_alias_analysis ();
6533 /* Find all the available and anticipatable stores. */
6534 num_stores = compute_store_table ();
6535 if (num_stores == 0)
6537 htab_delete (pre_ldst_table);
6538 pre_ldst_table = NULL;
6539 sbitmap_vector_free (reg_set_in_block);
6540 end_alias_analysis ();
6541 return;
6544 /* Now compute kill & transp vectors. */
6545 build_store_vectors ();
6546 add_noreturn_fake_exit_edges ();
6547 connect_infinite_loops_to_exit ();
6549 edge_list = pre_edge_rev_lcm (num_stores, transp, ae_gen,
6550 st_antloc, ae_kill, &pre_insert_map,
6551 &pre_delete_map);
6553 /* Now we want to insert the new stores which are going to be needed. */
6554 for (ptr = first_ls_expr (); ptr != NULL; ptr = next_ls_expr (ptr))
6556 /* If any of the edges we have above are abnormal, we can't move this
6557 store. */
6558 for (x = NUM_EDGES (edge_list) - 1; x >= 0; x--)
6559 if (TEST_BIT (pre_insert_map[x], ptr->index)
6560 && (INDEX_EDGE (edge_list, x)->flags & EDGE_ABNORMAL))
6561 break;
6563 if (x >= 0)
6565 if (dump_file != NULL)
6566 fprintf (dump_file,
6567 "Can't replace store %d: abnormal edge from %d to %d\n",
6568 ptr->index, INDEX_EDGE (edge_list, x)->src->index,
6569 INDEX_EDGE (edge_list, x)->dest->index);
6570 continue;
6573 /* Now we want to insert the new stores which are going to be needed. */
6575 FOR_EACH_BB (bb)
6576 if (TEST_BIT (pre_delete_map[bb->index], ptr->index))
6577 delete_store (ptr, bb);
6579 for (x = 0; x < NUM_EDGES (edge_list); x++)
6580 if (TEST_BIT (pre_insert_map[x], ptr->index))
6581 update_flow |= insert_store (ptr, INDEX_EDGE (edge_list, x));
6584 if (update_flow)
6585 commit_edge_insertions ();
6587 free_store_memory ();
6588 free_edge_list (edge_list);
6589 remove_fake_exit_edges ();
6590 end_alias_analysis ();
6594 /* Entry point for jump bypassing optimization pass. */
6596 static int
6597 bypass_jumps (void)
6599 int changed;
6601 /* We do not construct an accurate cfg in functions which call
6602 setjmp, so just punt to be safe. */
6603 if (cfun->calls_setjmp)
6604 return 0;
6606 /* Identify the basic block information for this function, including
6607 successors and predecessors. */
6608 max_gcse_regno = max_reg_num ();
6610 if (dump_file)
6611 dump_flow_info (dump_file, dump_flags);
6613 /* Return if there's nothing to do, or it is too expensive. */
6614 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
6615 || is_too_expensive (_ ("jump bypassing disabled")))
6616 return 0;
6618 gcc_obstack_init (&gcse_obstack);
6619 bytes_used = 0;
6621 /* We need alias. */
6622 init_alias_analysis ();
6624 /* Record where pseudo-registers are set. This data is kept accurate
6625 during each pass. ??? We could also record hard-reg information here
6626 [since it's unchanging], however it is currently done during hash table
6627 computation.
6629 It may be tempting to compute MEM set information here too, but MEM sets
6630 will be subject to code motion one day and thus we need to compute
6631 information about memory sets when we build the hash tables. */
6633 alloc_reg_set_mem (max_gcse_regno);
6634 compute_sets ();
6636 max_gcse_regno = max_reg_num ();
6637 alloc_gcse_mem ();
6638 changed = one_cprop_pass (MAX_GCSE_PASSES + 2, true, true);
6639 free_gcse_mem ();
6641 if (dump_file)
6643 fprintf (dump_file, "BYPASS of %s: %d basic blocks, ",
6644 current_function_name (), n_basic_blocks);
6645 fprintf (dump_file, "%d bytes\n\n", bytes_used);
6648 obstack_free (&gcse_obstack, NULL);
6649 free_reg_set_mem ();
6651 /* We are finished with alias. */
6652 end_alias_analysis ();
6654 return changed;
6657 /* Return true if the graph is too expensive to optimize. PASS is the
6658 optimization about to be performed. */
6660 static bool
6661 is_too_expensive (const char *pass)
6663 /* Trying to perform global optimizations on flow graphs which have
6664 a high connectivity will take a long time and is unlikely to be
6665 particularly useful.
6667 In normal circumstances a cfg should have about twice as many
6668 edges as blocks. But we do not want to punish small functions
6669 which have a couple switch statements. Rather than simply
6670 threshold the number of blocks, uses something with a more
6671 graceful degradation. */
6672 if (n_edges > 20000 + n_basic_blocks * 4)
6674 warning (OPT_Wdisabled_optimization,
6675 "%s: %d basic blocks and %d edges/basic block",
6676 pass, n_basic_blocks, n_edges / n_basic_blocks);
6678 return true;
6681 /* If allocating memory for the cprop bitmap would take up too much
6682 storage it's better just to disable the optimization. */
6683 if ((n_basic_blocks
6684 * SBITMAP_SET_SIZE (max_reg_num ())
6685 * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
6687 warning (OPT_Wdisabled_optimization,
6688 "%s: %d basic blocks and %d registers",
6689 pass, n_basic_blocks, max_reg_num ());
6691 return true;
6694 return false;
6697 static bool
6698 gate_handle_jump_bypass (void)
6700 return optimize > 0 && flag_gcse
6701 && dbg_cnt (jump_bypass);
6704 /* Perform jump bypassing and control flow optimizations. */
6705 static unsigned int
6706 rest_of_handle_jump_bypass (void)
6708 delete_unreachable_blocks ();
6709 if (bypass_jumps ())
6711 delete_trivially_dead_insns (get_insns (), max_reg_num ());
6712 rebuild_jump_labels (get_insns ());
6713 cleanup_cfg (0);
6715 return 0;
6718 struct rtl_opt_pass pass_jump_bypass =
6721 RTL_PASS,
6722 "bypass", /* name */
6723 gate_handle_jump_bypass, /* gate */
6724 rest_of_handle_jump_bypass, /* execute */
6725 NULL, /* sub */
6726 NULL, /* next */
6727 0, /* static_pass_number */
6728 TV_BYPASS, /* tv_id */
6729 0, /* properties_required */
6730 0, /* properties_provided */
6731 0, /* properties_destroyed */
6732 0, /* todo_flags_start */
6733 TODO_dump_func |
6734 TODO_ggc_collect | TODO_verify_flow /* todo_flags_finish */
6739 static bool
6740 gate_handle_gcse (void)
6742 return optimize > 0 && flag_gcse
6743 && dbg_cnt (gcse);
6747 static unsigned int
6748 rest_of_handle_gcse (void)
6750 int save_csb, save_cfj;
6751 int tem2 = 0, tem;
6752 tem = gcse_main (get_insns ());
6753 delete_trivially_dead_insns (get_insns (), max_reg_num ());
6754 rebuild_jump_labels (get_insns ());
6755 save_csb = flag_cse_skip_blocks;
6756 save_cfj = flag_cse_follow_jumps;
6757 flag_cse_skip_blocks = flag_cse_follow_jumps = 0;
6759 /* If -fexpensive-optimizations, re-run CSE to clean up things done
6760 by gcse. */
6761 if (flag_expensive_optimizations)
6763 timevar_push (TV_CSE);
6764 tem2 = cse_main (get_insns (), max_reg_num ());
6765 df_finish_pass (false);
6766 purge_all_dead_edges ();
6767 delete_trivially_dead_insns (get_insns (), max_reg_num ());
6768 timevar_pop (TV_CSE);
6769 cse_not_expected = !flag_rerun_cse_after_loop;
6772 /* If gcse or cse altered any jumps, rerun jump optimizations to clean
6773 things up. */
6774 if (tem || tem2 == 2)
6776 timevar_push (TV_JUMP);
6777 rebuild_jump_labels (get_insns ());
6778 cleanup_cfg (0);
6779 timevar_pop (TV_JUMP);
6781 else if (tem2 == 1)
6782 cleanup_cfg (0);
6784 flag_cse_skip_blocks = save_csb;
6785 flag_cse_follow_jumps = save_cfj;
6786 return 0;
6789 struct rtl_opt_pass pass_gcse =
6792 RTL_PASS,
6793 "gcse1", /* name */
6794 gate_handle_gcse, /* gate */
6795 rest_of_handle_gcse, /* execute */
6796 NULL, /* sub */
6797 NULL, /* next */
6798 0, /* static_pass_number */
6799 TV_GCSE, /* tv_id */
6800 0, /* properties_required */
6801 0, /* properties_provided */
6802 0, /* properties_destroyed */
6803 0, /* todo_flags_start */
6804 TODO_df_finish | TODO_verify_rtl_sharing |
6805 TODO_dump_func |
6806 TODO_verify_flow | TODO_ggc_collect /* todo_flags_finish */
6811 #include "gt-gcse.h"