* acinclude.m4: Check for native targets that can't link at
[official-gcc.git] / gcc / gcse.c
blob2cc7f0b1c6736aee620027963a650dd0921f0685
1 /* Global common subexpression elimination/Partial redundancy elimination
2 and global constant/copy propagation for GNU compiler.
3 Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the Free
20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 02111-1307, USA. */
23 /* TODO
24 - reordering of memory allocation and freeing to be more space efficient
25 - do rough calc of how many regs are needed in each block, and a rough
26 calc of how many regs are available in each class and use that to
27 throttle back the code in cases where RTX_COST is minimal.
28 - a store to the same address as a load does not kill the load if the
29 source of the store is also the destination of the load. Handling this
30 allows more load motion, particularly out of loops.
31 - ability to realloc sbitmap vectors would allow one initial computation
32 of reg_set_in_block with only subsequent additions, rather than
33 recomputing it for each pass
37 /* References searched while implementing this.
39 Compilers Principles, Techniques and Tools
40 Aho, Sethi, Ullman
41 Addison-Wesley, 1988
43 Global Optimization by Suppression of Partial Redundancies
44 E. Morel, C. Renvoise
45 communications of the acm, Vol. 22, Num. 2, Feb. 1979
47 A Portable Machine-Independent Global Optimizer - Design and Measurements
48 Frederick Chow
49 Stanford Ph.D. thesis, Dec. 1983
51 A Fast Algorithm for Code Movement Optimization
52 D.M. Dhamdhere
53 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
55 A Solution to a Problem with Morel and Renvoise's
56 Global Optimization by Suppression of Partial Redundancies
57 K-H Drechsler, M.P. Stadel
58 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
60 Practical Adaptation of the Global Optimization
61 Algorithm of Morel and Renvoise
62 D.M. Dhamdhere
63 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
65 Efficiently Computing Static Single Assignment Form and the Control
66 Dependence Graph
67 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
68 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
70 Lazy Code Motion
71 J. Knoop, O. Ruthing, B. Steffen
72 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
74 What's In a Region? Or Computing Control Dependence Regions in Near-Linear
75 Time for Reducible Flow Control
76 Thomas Ball
77 ACM Letters on Programming Languages and Systems,
78 Vol. 2, Num. 1-4, Mar-Dec 1993
80 An Efficient Representation for Sparse Sets
81 Preston Briggs, Linda Torczon
82 ACM Letters on Programming Languages and Systems,
83 Vol. 2, Num. 1-4, Mar-Dec 1993
85 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
86 K-H Drechsler, M.P. Stadel
87 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
89 Partial Dead Code Elimination
90 J. Knoop, O. Ruthing, B. Steffen
91 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
93 Effective Partial Redundancy Elimination
94 P. Briggs, K.D. Cooper
95 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
97 The Program Structure Tree: Computing Control Regions in Linear Time
98 R. Johnson, D. Pearson, K. Pingali
99 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
101 Optimal Code Motion: Theory and Practice
102 J. Knoop, O. Ruthing, B. Steffen
103 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
105 The power of assignment motion
106 J. Knoop, O. Ruthing, B. Steffen
107 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
109 Global code motion / global value numbering
110 C. Click
111 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
113 Value Driven Redundancy Elimination
114 L.T. Simpson
115 Rice University Ph.D. thesis, Apr. 1996
117 Value Numbering
118 L.T. Simpson
119 Massively Scalar Compiler Project, Rice University, Sep. 1996
121 High Performance Compilers for Parallel Computing
122 Michael Wolfe
123 Addison-Wesley, 1996
125 Advanced Compiler Design and Implementation
126 Steven Muchnick
127 Morgan Kaufmann, 1997
129 Building an Optimizing Compiler
130 Robert Morgan
131 Digital Press, 1998
133 People wishing to speed up the code here should read:
134 Elimination Algorithms for Data Flow Analysis
135 B.G. Ryder, M.C. Paull
136 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
138 How to Analyze Large Programs Efficiently and Informatively
139 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
140 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
142 People wishing to do something different can find various possibilities
143 in the above papers and elsewhere.
146 #include "config.h"
147 #include "system.h"
148 #include "coretypes.h"
149 #include "tm.h"
150 #include "toplev.h"
152 #include "rtl.h"
153 #include "tm_p.h"
154 #include "regs.h"
155 #include "hard-reg-set.h"
156 #include "flags.h"
157 #include "real.h"
158 #include "insn-config.h"
159 #include "recog.h"
160 #include "basic-block.h"
161 #include "output.h"
162 #include "function.h"
163 #include "expr.h"
164 #include "except.h"
165 #include "ggc.h"
166 #include "params.h"
167 #include "cselib.h"
169 #include "obstack.h"
171 /* Propagate flow information through back edges and thus enable PRE's
172 moving loop invariant calculations out of loops.
174 Originally this tended to create worse overall code, but several
175 improvements during the development of PRE seem to have made following
176 back edges generally a win.
178 Note much of the loop invariant code motion done here would normally
179 be done by loop.c, which has more heuristics for when to move invariants
180 out of loops. At some point we might need to move some of those
181 heuristics into gcse.c. */
183 /* We support GCSE via Partial Redundancy Elimination. PRE optimizations
184 are a superset of those done by GCSE.
186 We perform the following steps:
188 1) Compute basic block information.
190 2) Compute table of places where registers are set.
192 3) Perform copy/constant propagation.
194 4) Perform global cse.
196 5) Perform another pass of copy/constant propagation.
198 Two passes of copy/constant propagation are done because the first one
199 enables more GCSE and the second one helps to clean up the copies that
200 GCSE creates. This is needed more for PRE than for Classic because Classic
201 GCSE will try to use an existing register containing the common
202 subexpression rather than create a new one. This is harder to do for PRE
203 because of the code motion (which Classic GCSE doesn't do).
205 Expressions we are interested in GCSE-ing are of the form
206 (set (pseudo-reg) (expression)).
207 Function want_to_gcse_p says what these are.
209 PRE handles moving invariant expressions out of loops (by treating them as
210 partially redundant).
212 Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
213 assignment) based GVN (global value numbering). L. T. Simpson's paper
214 (Rice University) on value numbering is a useful reference for this.
216 **********************
218 We used to support multiple passes but there are diminishing returns in
219 doing so. The first pass usually makes 90% of the changes that are doable.
220 A second pass can make a few more changes made possible by the first pass.
221 Experiments show any further passes don't make enough changes to justify
222 the expense.
224 A study of spec92 using an unlimited number of passes:
225 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
226 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
227 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
229 It was found doing copy propagation between each pass enables further
230 substitutions.
232 PRE is quite expensive in complicated functions because the DFA can take
233 awhile to converge. Hence we only perform one pass. The parameter max-gcse-passes can
234 be modified if one wants to experiment.
236 **********************
238 The steps for PRE are:
240 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
242 2) Perform the data flow analysis for PRE.
244 3) Delete the redundant instructions
246 4) Insert the required copies [if any] that make the partially
247 redundant instructions fully redundant.
249 5) For other reaching expressions, insert an instruction to copy the value
250 to a newly created pseudo that will reach the redundant instruction.
252 The deletion is done first so that when we do insertions we
253 know which pseudo reg to use.
255 Various papers have argued that PRE DFA is expensive (O(n^2)) and others
256 argue it is not. The number of iterations for the algorithm to converge
257 is typically 2-4 so I don't view it as that expensive (relatively speaking).
259 PRE GCSE depends heavily on the second CSE pass to clean up the copies
260 we create. To make an expression reach the place where it's redundant,
261 the result of the expression is copied to a new register, and the redundant
262 expression is deleted by replacing it with this new register. Classic GCSE
263 doesn't have this problem as much as it computes the reaching defs of
264 each register in each block and thus can try to use an existing register.
266 **********************
268 A fair bit of simplicity is created by creating small functions for simple
269 tasks, even when the function is only called in one place. This may
270 measurably slow things down [or may not] by creating more function call
271 overhead than is necessary. The source is laid out so that it's trivial
272 to make the affected functions inline so that one can measure what speed
273 up, if any, can be achieved, and maybe later when things settle things can
274 be rearranged.
276 Help stamp out big monolithic functions! */
278 /* GCSE global vars. */
280 /* -dG dump file. */
281 static FILE *gcse_file;
283 /* Note whether or not we should run jump optimization after gcse. We
284 want to do this for two cases.
286 * If we changed any jumps via cprop.
288 * If we added any labels via edge splitting. */
290 static int run_jump_opt_after_gcse;
292 /* Bitmaps are normally not included in debugging dumps.
293 However it's useful to be able to print them from GDB.
294 We could create special functions for this, but it's simpler to
295 just allow passing stderr to the dump_foo fns. Since stderr can
296 be a macro, we store a copy here. */
297 static FILE *debug_stderr;
299 /* An obstack for our working variables. */
300 static struct obstack gcse_obstack;
302 /* Nonzero for each mode that supports (set (reg) (reg)).
303 This is trivially true for integer and floating point values.
304 It may or may not be true for condition codes. */
305 static char can_copy_p[(int) NUM_MACHINE_MODES];
307 /* Nonzero if can_copy_p has been initialized. */
308 static int can_copy_init_p;
310 struct reg_use {rtx reg_rtx; };
312 /* Hash table of expressions. */
314 struct expr
316 /* The expression (SET_SRC for expressions, PATTERN for assignments). */
317 rtx expr;
318 /* Index in the available expression bitmaps. */
319 int bitmap_index;
320 /* Next entry with the same hash. */
321 struct expr *next_same_hash;
322 /* List of anticipatable occurrences in basic blocks in the function.
323 An "anticipatable occurrence" is one that is the first occurrence in the
324 basic block, the operands are not modified in the basic block prior
325 to the occurrence and the output is not used between the start of
326 the block and the occurrence. */
327 struct occr *antic_occr;
328 /* List of available occurrence in basic blocks in the function.
329 An "available occurrence" is one that is the last occurrence in the
330 basic block and the operands are not modified by following statements in
331 the basic block [including this insn]. */
332 struct occr *avail_occr;
333 /* Non-null if the computation is PRE redundant.
334 The value is the newly created pseudo-reg to record a copy of the
335 expression in all the places that reach the redundant copy. */
336 rtx reaching_reg;
339 /* Occurrence of an expression.
340 There is one per basic block. If a pattern appears more than once the
341 last appearance is used [or first for anticipatable expressions]. */
343 struct occr
345 /* Next occurrence of this expression. */
346 struct occr *next;
347 /* The insn that computes the expression. */
348 rtx insn;
349 /* Nonzero if this [anticipatable] occurrence has been deleted. */
350 char deleted_p;
351 /* Nonzero if this [available] occurrence has been copied to
352 reaching_reg. */
353 /* ??? This is mutually exclusive with deleted_p, so they could share
354 the same byte. */
355 char copied_p;
358 /* Expression and copy propagation hash tables.
359 Each hash table is an array of buckets.
360 ??? It is known that if it were an array of entries, structure elements
361 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
362 not clear whether in the final analysis a sufficient amount of memory would
363 be saved as the size of the available expression bitmaps would be larger
364 [one could build a mapping table without holes afterwards though].
365 Someday I'll perform the computation and figure it out. */
367 struct hash_table
369 /* The table itself.
370 This is an array of `expr_hash_table_size' elements. */
371 struct expr **table;
373 /* Size of the hash table, in elements. */
374 unsigned int size;
376 /* Number of hash table elements. */
377 unsigned int n_elems;
379 /* Whether the table is expression of copy propagation one. */
380 int set_p;
383 /* Expression hash table. */
384 static struct hash_table expr_hash_table;
386 /* Copy propagation hash table. */
387 static struct hash_table set_hash_table;
389 /* Mapping of uids to cuids.
390 Only real insns get cuids. */
391 static int *uid_cuid;
393 /* Highest UID in UID_CUID. */
394 static int max_uid;
396 /* Get the cuid of an insn. */
397 #ifdef ENABLE_CHECKING
398 #define INSN_CUID(INSN) (INSN_UID (INSN) > max_uid ? (abort (), 0) : uid_cuid[INSN_UID (INSN)])
399 #else
400 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
401 #endif
403 /* Number of cuids. */
404 static int max_cuid;
406 /* Mapping of cuids to insns. */
407 static rtx *cuid_insn;
409 /* Get insn from cuid. */
410 #define CUID_INSN(CUID) (cuid_insn[CUID])
412 /* Maximum register number in function prior to doing gcse + 1.
413 Registers created during this pass have regno >= max_gcse_regno.
414 This is named with "gcse" to not collide with global of same name. */
415 static unsigned int max_gcse_regno;
417 /* Table of registers that are modified.
419 For each register, each element is a list of places where the pseudo-reg
420 is set.
422 For simplicity, GCSE is done on sets of pseudo-regs only. PRE GCSE only
423 requires knowledge of which blocks kill which regs [and thus could use
424 a bitmap instead of the lists `reg_set_table' uses].
426 `reg_set_table' and could be turned into an array of bitmaps (num-bbs x
427 num-regs) [however perhaps it may be useful to keep the data as is]. One
428 advantage of recording things this way is that `reg_set_table' is fairly
429 sparse with respect to pseudo regs but for hard regs could be fairly dense
430 [relatively speaking]. And recording sets of pseudo-regs in lists speeds
431 up functions like compute_transp since in the case of pseudo-regs we only
432 need to iterate over the number of times a pseudo-reg is set, not over the
433 number of basic blocks [clearly there is a bit of a slow down in the cases
434 where a pseudo is set more than once in a block, however it is believed
435 that the net effect is to speed things up]. This isn't done for hard-regs
436 because recording call-clobbered hard-regs in `reg_set_table' at each
437 function call can consume a fair bit of memory, and iterating over
438 hard-regs stored this way in compute_transp will be more expensive. */
440 typedef struct reg_set
442 /* The next setting of this register. */
443 struct reg_set *next;
444 /* The insn where it was set. */
445 rtx insn;
446 } reg_set;
448 static reg_set **reg_set_table;
450 /* Size of `reg_set_table'.
451 The table starts out at max_gcse_regno + slop, and is enlarged as
452 necessary. */
453 static int reg_set_table_size;
455 /* Amount to grow `reg_set_table' by when it's full. */
456 #define REG_SET_TABLE_SLOP 100
458 /* This is a list of expressions which are MEMs and will be used by load
459 or store motion.
460 Load motion tracks MEMs which aren't killed by
461 anything except itself. (ie, loads and stores to a single location).
462 We can then allow movement of these MEM refs with a little special
463 allowance. (all stores copy the same value to the reaching reg used
464 for the loads). This means all values used to store into memory must have
465 no side effects so we can re-issue the setter value.
466 Store Motion uses this structure as an expression table to track stores
467 which look interesting, and might be moveable towards the exit block. */
469 struct ls_expr
471 struct expr * expr; /* Gcse expression reference for LM. */
472 rtx pattern; /* Pattern of this mem. */
473 rtx loads; /* INSN list of loads seen. */
474 rtx stores; /* INSN list of stores seen. */
475 struct ls_expr * next; /* Next in the list. */
476 int invalid; /* Invalid for some reason. */
477 int index; /* If it maps to a bitmap index. */
478 int hash_index; /* Index when in a hash table. */
479 rtx reaching_reg; /* Register to use when re-writing. */
482 /* Array of implicit set patterns indexed by basic block index. */
483 static rtx *implicit_sets;
485 /* Head of the list of load/store memory refs. */
486 static struct ls_expr * pre_ldst_mems = NULL;
488 /* Bitmap containing one bit for each register in the program.
489 Used when performing GCSE to track which registers have been set since
490 the start of the basic block. */
491 static regset reg_set_bitmap;
493 /* For each block, a bitmap of registers set in the block.
494 This is used by expr_killed_p and compute_transp.
495 It is computed during hash table computation and not by compute_sets
496 as it includes registers added since the last pass (or between cprop and
497 gcse) and it's currently not easy to realloc sbitmap vectors. */
498 static sbitmap *reg_set_in_block;
500 /* Array, indexed by basic block number for a list of insns which modify
501 memory within that block. */
502 static rtx * modify_mem_list;
503 bitmap modify_mem_list_set;
505 /* This array parallels modify_mem_list, but is kept canonicalized. */
506 static rtx * canon_modify_mem_list;
507 bitmap canon_modify_mem_list_set;
508 /* Various variables for statistics gathering. */
510 /* Memory used in a pass.
511 This isn't intended to be absolutely precise. Its intent is only
512 to keep an eye on memory usage. */
513 static int bytes_used;
515 /* GCSE substitutions made. */
516 static int gcse_subst_count;
517 /* Number of copy instructions created. */
518 static int gcse_create_count;
519 /* Number of constants propagated. */
520 static int const_prop_count;
521 /* Number of copys propagated. */
522 static int copy_prop_count;
524 /* These variables are used by classic GCSE.
525 Normally they'd be defined a bit later, but `rd_gen' needs to
526 be declared sooner. */
528 /* Each block has a bitmap of each type.
529 The length of each blocks bitmap is:
531 max_cuid - for reaching definitions
532 n_exprs - for available expressions
534 Thus we view the bitmaps as 2 dimensional arrays. i.e.
535 rd_kill[block_num][cuid_num]
536 ae_kill[block_num][expr_num] */
538 /* For reaching defs */
539 static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
541 /* for available exprs */
542 static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
544 /* Objects of this type are passed around by the null-pointer check
545 removal routines. */
546 struct null_pointer_info
548 /* The basic block being processed. */
549 basic_block current_block;
550 /* The first register to be handled in this pass. */
551 unsigned int min_reg;
552 /* One greater than the last register to be handled in this pass. */
553 unsigned int max_reg;
554 sbitmap *nonnull_local;
555 sbitmap *nonnull_killed;
558 static void compute_can_copy PARAMS ((void));
559 static char *gmalloc PARAMS ((unsigned int));
560 static char *grealloc PARAMS ((char *, unsigned int));
561 static char *gcse_alloc PARAMS ((unsigned long));
562 static void alloc_gcse_mem PARAMS ((rtx));
563 static void free_gcse_mem PARAMS ((void));
564 static void alloc_reg_set_mem PARAMS ((int));
565 static void free_reg_set_mem PARAMS ((void));
566 static int get_bitmap_width PARAMS ((int, int, int));
567 static void record_one_set PARAMS ((int, rtx));
568 static void record_set_info PARAMS ((rtx, rtx, void *));
569 static void compute_sets PARAMS ((rtx));
570 static void hash_scan_insn PARAMS ((rtx, struct hash_table *, int));
571 static void hash_scan_set PARAMS ((rtx, rtx, struct hash_table *));
572 static void hash_scan_clobber PARAMS ((rtx, rtx, struct hash_table *));
573 static void hash_scan_call PARAMS ((rtx, rtx, struct hash_table *));
574 static int want_to_gcse_p PARAMS ((rtx));
575 static int oprs_unchanged_p PARAMS ((rtx, rtx, int));
576 static int oprs_anticipatable_p PARAMS ((rtx, rtx));
577 static int oprs_available_p PARAMS ((rtx, rtx));
578 static void insert_expr_in_table PARAMS ((rtx, enum machine_mode, rtx,
579 int, int, struct hash_table *));
580 static void insert_set_in_table PARAMS ((rtx, rtx, struct hash_table *));
581 static unsigned int hash_expr PARAMS ((rtx, enum machine_mode, int *, int));
582 static unsigned int hash_expr_1 PARAMS ((rtx, enum machine_mode, int *));
583 static unsigned int hash_string_1 PARAMS ((const char *));
584 static unsigned int hash_set PARAMS ((int, int));
585 static int expr_equiv_p PARAMS ((rtx, rtx));
586 static void record_last_reg_set_info PARAMS ((rtx, int));
587 static void record_last_mem_set_info PARAMS ((rtx));
588 static void record_last_set_info PARAMS ((rtx, rtx, void *));
589 static void compute_hash_table PARAMS ((struct hash_table *));
590 static void alloc_hash_table PARAMS ((int, struct hash_table *, int));
591 static void free_hash_table PARAMS ((struct hash_table *));
592 static void compute_hash_table_work PARAMS ((struct hash_table *));
593 static void dump_hash_table PARAMS ((FILE *, const char *,
594 struct hash_table *));
595 static struct expr *lookup_expr PARAMS ((rtx, struct hash_table *));
596 static struct expr *lookup_set PARAMS ((unsigned int, rtx, struct hash_table *));
597 static struct expr *next_set PARAMS ((unsigned int, struct expr *));
598 static void reset_opr_set_tables PARAMS ((void));
599 static int oprs_not_set_p PARAMS ((rtx, rtx));
600 static void mark_call PARAMS ((rtx));
601 static void mark_set PARAMS ((rtx, rtx));
602 static void mark_clobber PARAMS ((rtx, rtx));
603 static void mark_oprs_set PARAMS ((rtx));
604 static void alloc_cprop_mem PARAMS ((int, int));
605 static void free_cprop_mem PARAMS ((void));
606 static void compute_transp PARAMS ((rtx, int, sbitmap *, int));
607 static void compute_transpout PARAMS ((void));
608 static void compute_local_properties PARAMS ((sbitmap *, sbitmap *, sbitmap *,
609 struct hash_table *));
610 static void compute_cprop_data PARAMS ((void));
611 static void find_used_regs PARAMS ((rtx *, void *));
612 static int try_replace_reg PARAMS ((rtx, rtx, rtx));
613 static struct expr *find_avail_set PARAMS ((int, rtx));
614 static int cprop_jump PARAMS ((basic_block, rtx, rtx, rtx, rtx));
615 static void mems_conflict_for_gcse_p PARAMS ((rtx, rtx, void *));
616 static int load_killed_in_block_p PARAMS ((basic_block, int, rtx, int));
617 static void canon_list_insert PARAMS ((rtx, rtx, void *));
618 static int cprop_insn PARAMS ((rtx, int));
619 static int cprop PARAMS ((int));
620 static rtx fis_get_condition PARAMS ((rtx));
621 static void find_implicit_sets PARAMS ((void));
622 static int one_cprop_pass PARAMS ((int, int, int));
623 static bool constprop_register PARAMS ((rtx, rtx, rtx, int));
624 static struct expr *find_bypass_set PARAMS ((int, int));
625 static int bypass_block PARAMS ((basic_block, rtx, rtx));
626 static int bypass_conditional_jumps PARAMS ((void));
627 static void alloc_pre_mem PARAMS ((int, int));
628 static void free_pre_mem PARAMS ((void));
629 static void compute_pre_data PARAMS ((void));
630 static int pre_expr_reaches_here_p PARAMS ((basic_block, struct expr *,
631 basic_block));
632 static void insert_insn_end_bb PARAMS ((struct expr *, basic_block, int));
633 static void pre_insert_copy_insn PARAMS ((struct expr *, rtx));
634 static void pre_insert_copies PARAMS ((void));
635 static int pre_delete PARAMS ((void));
636 static int pre_gcse PARAMS ((void));
637 static int one_pre_gcse_pass PARAMS ((int));
638 static void add_label_notes PARAMS ((rtx, rtx));
639 static void alloc_code_hoist_mem PARAMS ((int, int));
640 static void free_code_hoist_mem PARAMS ((void));
641 static void compute_code_hoist_vbeinout PARAMS ((void));
642 static void compute_code_hoist_data PARAMS ((void));
643 static int hoist_expr_reaches_here_p PARAMS ((basic_block, int, basic_block,
644 char *));
645 static void hoist_code PARAMS ((void));
646 static int one_code_hoisting_pass PARAMS ((void));
647 static void alloc_rd_mem PARAMS ((int, int));
648 static void free_rd_mem PARAMS ((void));
649 static void handle_rd_kill_set PARAMS ((rtx, int, basic_block));
650 static void compute_kill_rd PARAMS ((void));
651 static void compute_rd PARAMS ((void));
652 static void alloc_avail_expr_mem PARAMS ((int, int));
653 static void free_avail_expr_mem PARAMS ((void));
654 static void compute_ae_gen PARAMS ((struct hash_table *));
655 static int expr_killed_p PARAMS ((rtx, basic_block));
656 static void compute_ae_kill PARAMS ((sbitmap *, sbitmap *, struct hash_table *));
657 static int expr_reaches_here_p PARAMS ((struct occr *, struct expr *,
658 basic_block, int));
659 static rtx computing_insn PARAMS ((struct expr *, rtx));
660 static int def_reaches_here_p PARAMS ((rtx, rtx));
661 static int can_disregard_other_sets PARAMS ((struct reg_set **, rtx, int));
662 static int handle_avail_expr PARAMS ((rtx, struct expr *));
663 static int classic_gcse PARAMS ((void));
664 static int one_classic_gcse_pass PARAMS ((int));
665 static void invalidate_nonnull_info PARAMS ((rtx, rtx, void *));
666 static int delete_null_pointer_checks_1 PARAMS ((unsigned int *,
667 sbitmap *, sbitmap *,
668 struct null_pointer_info *));
669 static rtx process_insert_insn PARAMS ((struct expr *));
670 static int pre_edge_insert PARAMS ((struct edge_list *, struct expr **));
671 static int expr_reaches_here_p_work PARAMS ((struct occr *, struct expr *,
672 basic_block, int, char *));
673 static int pre_expr_reaches_here_p_work PARAMS ((basic_block, struct expr *,
674 basic_block, char *));
675 static struct ls_expr * ldst_entry PARAMS ((rtx));
676 static void free_ldst_entry PARAMS ((struct ls_expr *));
677 static void free_ldst_mems PARAMS ((void));
678 static void print_ldst_list PARAMS ((FILE *));
679 static struct ls_expr * find_rtx_in_ldst PARAMS ((rtx));
680 static int enumerate_ldsts PARAMS ((void));
681 static inline struct ls_expr * first_ls_expr PARAMS ((void));
682 static inline struct ls_expr * next_ls_expr PARAMS ((struct ls_expr *));
683 static int simple_mem PARAMS ((rtx));
684 static void invalidate_any_buried_refs PARAMS ((rtx));
685 static void compute_ld_motion_mems PARAMS ((void));
686 static void trim_ld_motion_mems PARAMS ((void));
687 static void update_ld_motion_stores PARAMS ((struct expr *));
688 static void reg_set_info PARAMS ((rtx, rtx, void *));
689 static int store_ops_ok PARAMS ((rtx, basic_block));
690 static void find_moveable_store PARAMS ((rtx));
691 static int compute_store_table PARAMS ((void));
692 static int load_kills_store PARAMS ((rtx, rtx));
693 static int find_loads PARAMS ((rtx, rtx));
694 static int store_killed_in_insn PARAMS ((rtx, rtx));
695 static int store_killed_after PARAMS ((rtx, rtx, basic_block));
696 static int store_killed_before PARAMS ((rtx, rtx, basic_block));
697 static void build_store_vectors PARAMS ((void));
698 static void insert_insn_start_bb PARAMS ((rtx, basic_block));
699 static int insert_store PARAMS ((struct ls_expr *, edge));
700 static void replace_store_insn PARAMS ((rtx, rtx, basic_block));
701 static void delete_store PARAMS ((struct ls_expr *,
702 basic_block));
703 static void free_store_memory PARAMS ((void));
704 static void store_motion PARAMS ((void));
705 static void free_insn_expr_list_list PARAMS ((rtx *));
706 static void clear_modify_mem_tables PARAMS ((void));
707 static void free_modify_mem_tables PARAMS ((void));
708 static rtx gcse_emit_move_after PARAMS ((rtx, rtx, rtx));
709 static void local_cprop_find_used_regs PARAMS ((rtx *, void *));
710 static bool do_local_cprop PARAMS ((rtx, rtx, int, rtx*));
711 static bool adjust_libcall_notes PARAMS ((rtx, rtx, rtx, rtx*));
712 static void local_cprop_pass PARAMS ((int));
714 /* Entry point for global common subexpression elimination.
715 F is the first instruction in the function. */
718 gcse_main (f, file)
719 rtx f;
720 FILE *file;
722 int changed, pass;
723 /* Bytes used at start of pass. */
724 int initial_bytes_used;
725 /* Maximum number of bytes used by a pass. */
726 int max_pass_bytes;
727 /* Point to release obstack data from for each pass. */
728 char *gcse_obstack_bottom;
730 /* We do not construct an accurate cfg in functions which call
731 setjmp, so just punt to be safe. */
732 if (current_function_calls_setjmp)
733 return 0;
735 /* Assume that we do not need to run jump optimizations after gcse. */
736 run_jump_opt_after_gcse = 0;
738 /* For calling dump_foo fns from gdb. */
739 debug_stderr = stderr;
740 gcse_file = file;
742 /* Identify the basic block information for this function, including
743 successors and predecessors. */
744 max_gcse_regno = max_reg_num ();
746 if (file)
747 dump_flow_info (file);
749 /* Return if there's nothing to do. */
750 if (n_basic_blocks <= 1)
751 return 0;
753 /* Trying to perform global optimizations on flow graphs which have
754 a high connectivity will take a long time and is unlikely to be
755 particularly useful.
757 In normal circumstances a cfg should have about twice as many edges
758 as blocks. But we do not want to punish small functions which have
759 a couple switch statements. So we require a relatively large number
760 of basic blocks and the ratio of edges to blocks to be high. */
761 if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
763 if (warn_disabled_optimization)
764 warning ("GCSE disabled: %d > 1000 basic blocks and %d >= 20 edges/basic block",
765 n_basic_blocks, n_edges / n_basic_blocks);
766 return 0;
769 /* If allocating memory for the cprop bitmap would take up too much
770 storage it's better just to disable the optimization. */
771 if ((n_basic_blocks
772 * SBITMAP_SET_SIZE (max_gcse_regno)
773 * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
775 if (warn_disabled_optimization)
776 warning ("GCSE disabled: %d basic blocks and %d registers",
777 n_basic_blocks, max_gcse_regno);
779 return 0;
782 /* See what modes support reg/reg copy operations. */
783 if (! can_copy_init_p)
785 compute_can_copy ();
786 can_copy_init_p = 1;
789 gcc_obstack_init (&gcse_obstack);
790 bytes_used = 0;
792 /* We need alias. */
793 init_alias_analysis ();
794 /* Record where pseudo-registers are set. This data is kept accurate
795 during each pass. ??? We could also record hard-reg information here
796 [since it's unchanging], however it is currently done during hash table
797 computation.
799 It may be tempting to compute MEM set information here too, but MEM sets
800 will be subject to code motion one day and thus we need to compute
801 information about memory sets when we build the hash tables. */
803 alloc_reg_set_mem (max_gcse_regno);
804 compute_sets (f);
806 pass = 0;
807 initial_bytes_used = bytes_used;
808 max_pass_bytes = 0;
809 gcse_obstack_bottom = gcse_alloc (1);
810 changed = 1;
811 while (changed && pass < MAX_GCSE_PASSES)
813 changed = 0;
814 if (file)
815 fprintf (file, "GCSE pass %d\n\n", pass + 1);
817 /* Initialize bytes_used to the space for the pred/succ lists,
818 and the reg_set_table data. */
819 bytes_used = initial_bytes_used;
821 /* Each pass may create new registers, so recalculate each time. */
822 max_gcse_regno = max_reg_num ();
824 alloc_gcse_mem (f);
826 /* Don't allow constant propagation to modify jumps
827 during this pass. */
828 changed = one_cprop_pass (pass + 1, 0, 0);
830 if (optimize_size)
831 changed |= one_classic_gcse_pass (pass + 1);
832 else
834 changed |= one_pre_gcse_pass (pass + 1);
835 /* We may have just created new basic blocks. Release and
836 recompute various things which are sized on the number of
837 basic blocks. */
838 if (changed)
840 free_modify_mem_tables ();
841 modify_mem_list
842 = (rtx *) gmalloc (last_basic_block * sizeof (rtx));
843 canon_modify_mem_list
844 = (rtx *) gmalloc (last_basic_block * sizeof (rtx));
845 memset ((char *) modify_mem_list, 0, last_basic_block * sizeof (rtx));
846 memset ((char *) canon_modify_mem_list, 0, last_basic_block * sizeof (rtx));
848 free_reg_set_mem ();
849 alloc_reg_set_mem (max_reg_num ());
850 compute_sets (f);
851 run_jump_opt_after_gcse = 1;
854 if (max_pass_bytes < bytes_used)
855 max_pass_bytes = bytes_used;
857 /* Free up memory, then reallocate for code hoisting. We can
858 not re-use the existing allocated memory because the tables
859 will not have info for the insns or registers created by
860 partial redundancy elimination. */
861 free_gcse_mem ();
863 /* It does not make sense to run code hoisting unless we optimizing
864 for code size -- it rarely makes programs faster, and can make
865 them bigger if we did partial redundancy elimination (when optimizing
866 for space, we use a classic gcse algorithm instead of partial
867 redundancy algorithms). */
868 if (optimize_size)
870 max_gcse_regno = max_reg_num ();
871 alloc_gcse_mem (f);
872 changed |= one_code_hoisting_pass ();
873 free_gcse_mem ();
875 if (max_pass_bytes < bytes_used)
876 max_pass_bytes = bytes_used;
879 if (file)
881 fprintf (file, "\n");
882 fflush (file);
885 obstack_free (&gcse_obstack, gcse_obstack_bottom);
886 pass++;
889 /* Do one last pass of copy propagation, including cprop into
890 conditional jumps. */
892 max_gcse_regno = max_reg_num ();
893 alloc_gcse_mem (f);
894 /* This time, go ahead and allow cprop to alter jumps. */
895 one_cprop_pass (pass + 1, 1, 0);
896 free_gcse_mem ();
898 if (file)
900 fprintf (file, "GCSE of %s: %d basic blocks, ",
901 current_function_name, n_basic_blocks);
902 fprintf (file, "%d pass%s, %d bytes\n\n",
903 pass, pass > 1 ? "es" : "", max_pass_bytes);
906 obstack_free (&gcse_obstack, NULL);
907 free_reg_set_mem ();
908 /* We are finished with alias. */
909 end_alias_analysis ();
910 allocate_reg_info (max_reg_num (), FALSE, FALSE);
912 /* Store motion disabled until it is fixed. */
913 if (0 && !optimize_size && flag_gcse_sm)
914 store_motion ();
915 /* Record where pseudo-registers are set. */
916 return run_jump_opt_after_gcse;
919 /* Misc. utilities. */
921 /* Compute which modes support reg/reg copy operations. */
923 static void
924 compute_can_copy ()
926 int i;
927 #ifndef AVOID_CCMODE_COPIES
928 rtx reg, insn;
929 #endif
930 memset (can_copy_p, 0, NUM_MACHINE_MODES);
932 start_sequence ();
933 for (i = 0; i < NUM_MACHINE_MODES; i++)
934 if (GET_MODE_CLASS (i) == MODE_CC)
936 #ifdef AVOID_CCMODE_COPIES
937 can_copy_p[i] = 0;
938 #else
939 reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
940 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
941 if (recog (PATTERN (insn), insn, NULL) >= 0)
942 can_copy_p[i] = 1;
943 #endif
945 else
946 can_copy_p[i] = 1;
948 end_sequence ();
951 /* Cover function to xmalloc to record bytes allocated. */
953 static char *
954 gmalloc (size)
955 unsigned int size;
957 bytes_used += size;
958 return xmalloc (size);
961 /* Cover function to xrealloc.
962 We don't record the additional size since we don't know it.
963 It won't affect memory usage stats much anyway. */
965 static char *
966 grealloc (ptr, size)
967 char *ptr;
968 unsigned int size;
970 return xrealloc (ptr, size);
973 /* Cover function to obstack_alloc. */
975 static char *
976 gcse_alloc (size)
977 unsigned long size;
979 bytes_used += size;
980 return (char *) obstack_alloc (&gcse_obstack, size);
983 /* Allocate memory for the cuid mapping array,
984 and reg/memory set tracking tables.
986 This is called at the start of each pass. */
988 static void
989 alloc_gcse_mem (f)
990 rtx f;
992 int i, n;
993 rtx insn;
995 /* Find the largest UID and create a mapping from UIDs to CUIDs.
996 CUIDs are like UIDs except they increase monotonically, have no gaps,
997 and only apply to real insns. */
999 max_uid = get_max_uid ();
1000 n = (max_uid + 1) * sizeof (int);
1001 uid_cuid = (int *) gmalloc (n);
1002 memset ((char *) uid_cuid, 0, n);
1003 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
1005 if (INSN_P (insn))
1006 uid_cuid[INSN_UID (insn)] = i++;
1007 else
1008 uid_cuid[INSN_UID (insn)] = i;
1011 /* Create a table mapping cuids to insns. */
1013 max_cuid = i;
1014 n = (max_cuid + 1) * sizeof (rtx);
1015 cuid_insn = (rtx *) gmalloc (n);
1016 memset ((char *) cuid_insn, 0, n);
1017 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
1018 if (INSN_P (insn))
1019 CUID_INSN (i++) = insn;
1021 /* Allocate vars to track sets of regs. */
1022 reg_set_bitmap = BITMAP_XMALLOC ();
1024 /* Allocate vars to track sets of regs, memory per block. */
1025 reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (last_basic_block,
1026 max_gcse_regno);
1027 /* Allocate array to keep a list of insns which modify memory in each
1028 basic block. */
1029 modify_mem_list = (rtx *) gmalloc (last_basic_block * sizeof (rtx));
1030 canon_modify_mem_list = (rtx *) gmalloc (last_basic_block * sizeof (rtx));
1031 memset ((char *) modify_mem_list, 0, last_basic_block * sizeof (rtx));
1032 memset ((char *) canon_modify_mem_list, 0, last_basic_block * sizeof (rtx));
1033 modify_mem_list_set = BITMAP_XMALLOC ();
1034 canon_modify_mem_list_set = BITMAP_XMALLOC ();
1037 /* Free memory allocated by alloc_gcse_mem. */
1039 static void
1040 free_gcse_mem ()
1042 free (uid_cuid);
1043 free (cuid_insn);
1045 BITMAP_XFREE (reg_set_bitmap);
1047 sbitmap_vector_free (reg_set_in_block);
1048 free_modify_mem_tables ();
1049 BITMAP_XFREE (modify_mem_list_set);
1050 BITMAP_XFREE (canon_modify_mem_list_set);
1053 /* Many of the global optimization algorithms work by solving dataflow
1054 equations for various expressions. Initially, some local value is
1055 computed for each expression in each block. Then, the values across the
1056 various blocks are combined (by following flow graph edges) to arrive at
1057 global values. Conceptually, each set of equations is independent. We
1058 may therefore solve all the equations in parallel, solve them one at a
1059 time, or pick any intermediate approach.
1061 When you're going to need N two-dimensional bitmaps, each X (say, the
1062 number of blocks) by Y (say, the number of expressions), call this
1063 function. It's not important what X and Y represent; only that Y
1064 correspond to the things that can be done in parallel. This function will
1065 return an appropriate chunking factor C; you should solve C sets of
1066 equations in parallel. By going through this function, we can easily
1067 trade space against time; by solving fewer equations in parallel we use
1068 less space. */
1070 static int
1071 get_bitmap_width (n, x, y)
1072 int n;
1073 int x;
1074 int y;
1076 /* It's not really worth figuring out *exactly* how much memory will
1077 be used by a particular choice. The important thing is to get
1078 something approximately right. */
1079 size_t max_bitmap_memory = 10 * 1024 * 1024;
1081 /* The number of bytes we'd use for a single column of minimum
1082 width. */
1083 size_t column_size = n * x * sizeof (SBITMAP_ELT_TYPE);
1085 /* Often, it's reasonable just to solve all the equations in
1086 parallel. */
1087 if (column_size * SBITMAP_SET_SIZE (y) <= max_bitmap_memory)
1088 return y;
1090 /* Otherwise, pick the largest width we can, without going over the
1091 limit. */
1092 return SBITMAP_ELT_BITS * ((max_bitmap_memory + column_size - 1)
1093 / column_size);
1096 /* Compute the local properties of each recorded expression.
1098 Local properties are those that are defined by the block, irrespective of
1099 other blocks.
1101 An expression is transparent in a block if its operands are not modified
1102 in the block.
1104 An expression is computed (locally available) in a block if it is computed
1105 at least once and expression would contain the same value if the
1106 computation was moved to the end of the block.
1108 An expression is locally anticipatable in a block if it is computed at
1109 least once and expression would contain the same value if the computation
1110 was moved to the beginning of the block.
1112 We call this routine for cprop, pre and code hoisting. They all compute
1113 basically the same information and thus can easily share this code.
1115 TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
1116 properties. If NULL, then it is not necessary to compute or record that
1117 particular property.
1119 TABLE controls which hash table to look at. If it is set hash table,
1120 additionally, TRANSP is computed as ~TRANSP, since this is really cprop's
1121 ABSALTERED. */
1123 static void
1124 compute_local_properties (transp, comp, antloc, table)
1125 sbitmap *transp;
1126 sbitmap *comp;
1127 sbitmap *antloc;
1128 struct hash_table *table;
1130 unsigned int i;
1132 /* Initialize any bitmaps that were passed in. */
1133 if (transp)
1135 if (table->set_p)
1136 sbitmap_vector_zero (transp, last_basic_block);
1137 else
1138 sbitmap_vector_ones (transp, last_basic_block);
1141 if (comp)
1142 sbitmap_vector_zero (comp, last_basic_block);
1143 if (antloc)
1144 sbitmap_vector_zero (antloc, last_basic_block);
1146 for (i = 0; i < table->size; i++)
1148 struct expr *expr;
1150 for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
1152 int indx = expr->bitmap_index;
1153 struct occr *occr;
1155 /* The expression is transparent in this block if it is not killed.
1156 We start by assuming all are transparent [none are killed], and
1157 then reset the bits for those that are. */
1158 if (transp)
1159 compute_transp (expr->expr, indx, transp, table->set_p);
1161 /* The occurrences recorded in antic_occr are exactly those that
1162 we want to set to nonzero in ANTLOC. */
1163 if (antloc)
1164 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1166 SET_BIT (antloc[BLOCK_NUM (occr->insn)], indx);
1168 /* While we're scanning the table, this is a good place to
1169 initialize this. */
1170 occr->deleted_p = 0;
1173 /* The occurrences recorded in avail_occr are exactly those that
1174 we want to set to nonzero in COMP. */
1175 if (comp)
1176 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
1178 SET_BIT (comp[BLOCK_NUM (occr->insn)], indx);
1180 /* While we're scanning the table, this is a good place to
1181 initialize this. */
1182 occr->copied_p = 0;
1185 /* While we're scanning the table, this is a good place to
1186 initialize this. */
1187 expr->reaching_reg = 0;
1192 /* Register set information.
1194 `reg_set_table' records where each register is set or otherwise
1195 modified. */
1197 static struct obstack reg_set_obstack;
1199 static void
1200 alloc_reg_set_mem (n_regs)
1201 int n_regs;
1203 unsigned int n;
1205 reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
1206 n = reg_set_table_size * sizeof (struct reg_set *);
1207 reg_set_table = (struct reg_set **) gmalloc (n);
1208 memset ((char *) reg_set_table, 0, n);
1210 gcc_obstack_init (&reg_set_obstack);
1213 static void
1214 free_reg_set_mem ()
1216 free (reg_set_table);
1217 obstack_free (&reg_set_obstack, NULL);
1220 /* Record REGNO in the reg_set table. */
1222 static void
1223 record_one_set (regno, insn)
1224 int regno;
1225 rtx insn;
1227 /* Allocate a new reg_set element and link it onto the list. */
1228 struct reg_set *new_reg_info;
1230 /* If the table isn't big enough, enlarge it. */
1231 if (regno >= reg_set_table_size)
1233 int new_size = regno + REG_SET_TABLE_SLOP;
1235 reg_set_table
1236 = (struct reg_set **) grealloc ((char *) reg_set_table,
1237 new_size * sizeof (struct reg_set *));
1238 memset ((char *) (reg_set_table + reg_set_table_size), 0,
1239 (new_size - reg_set_table_size) * sizeof (struct reg_set *));
1240 reg_set_table_size = new_size;
1243 new_reg_info = (struct reg_set *) obstack_alloc (&reg_set_obstack,
1244 sizeof (struct reg_set));
1245 bytes_used += sizeof (struct reg_set);
1246 new_reg_info->insn = insn;
1247 new_reg_info->next = reg_set_table[regno];
1248 reg_set_table[regno] = new_reg_info;
1251 /* Called from compute_sets via note_stores to handle one SET or CLOBBER in
1252 an insn. The DATA is really the instruction in which the SET is
1253 occurring. */
1255 static void
1256 record_set_info (dest, setter, data)
1257 rtx dest, setter ATTRIBUTE_UNUSED;
1258 void *data;
1260 rtx record_set_insn = (rtx) data;
1262 if (GET_CODE (dest) == REG && REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1263 record_one_set (REGNO (dest), record_set_insn);
1266 /* Scan the function and record each set of each pseudo-register.
1268 This is called once, at the start of the gcse pass. See the comments for
1269 `reg_set_table' for further documentation. */
1271 static void
1272 compute_sets (f)
1273 rtx f;
1275 rtx insn;
1277 for (insn = f; insn != 0; insn = NEXT_INSN (insn))
1278 if (INSN_P (insn))
1279 note_stores (PATTERN (insn), record_set_info, insn);
1282 /* Hash table support. */
1284 struct reg_avail_info
1286 basic_block last_bb;
1287 int first_set;
1288 int last_set;
1291 static struct reg_avail_info *reg_avail_info;
1292 static basic_block current_bb;
1295 /* See whether X, the source of a set, is something we want to consider for
1296 GCSE. */
1298 static GTY(()) rtx test_insn;
1299 static int
1300 want_to_gcse_p (x)
1301 rtx x;
1303 int num_clobbers = 0;
1304 int icode;
1306 switch (GET_CODE (x))
1308 case REG:
1309 case SUBREG:
1310 case CONST_INT:
1311 case CONST_DOUBLE:
1312 case CONST_VECTOR:
1313 case CALL:
1314 case CONSTANT_P_RTX:
1315 return 0;
1317 default:
1318 break;
1321 /* If this is a valid operand, we are OK. If it's VOIDmode, we aren't. */
1322 if (general_operand (x, GET_MODE (x)))
1323 return 1;
1324 else if (GET_MODE (x) == VOIDmode)
1325 return 0;
1327 /* Otherwise, check if we can make a valid insn from it. First initialize
1328 our test insn if we haven't already. */
1329 if (test_insn == 0)
1331 test_insn
1332 = make_insn_raw (gen_rtx_SET (VOIDmode,
1333 gen_rtx_REG (word_mode,
1334 FIRST_PSEUDO_REGISTER * 2),
1335 const0_rtx));
1336 NEXT_INSN (test_insn) = PREV_INSN (test_insn) = 0;
1339 /* Now make an insn like the one we would make when GCSE'ing and see if
1340 valid. */
1341 PUT_MODE (SET_DEST (PATTERN (test_insn)), GET_MODE (x));
1342 SET_SRC (PATTERN (test_insn)) = x;
1343 return ((icode = recog (PATTERN (test_insn), test_insn, &num_clobbers)) >= 0
1344 && (num_clobbers == 0 || ! added_clobbers_hard_reg_p (icode)));
1347 /* Return nonzero if the operands of expression X are unchanged from the
1348 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1349 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */
1351 static int
1352 oprs_unchanged_p (x, insn, avail_p)
1353 rtx x, insn;
1354 int avail_p;
1356 int i, j;
1357 enum rtx_code code;
1358 const char *fmt;
1360 if (x == 0)
1361 return 1;
1363 code = GET_CODE (x);
1364 switch (code)
1366 case REG:
1368 struct reg_avail_info *info = &reg_avail_info[REGNO (x)];
1370 if (info->last_bb != current_bb)
1371 return 1;
1372 if (avail_p)
1373 return info->last_set < INSN_CUID (insn);
1374 else
1375 return info->first_set >= INSN_CUID (insn);
1378 case MEM:
1379 if (load_killed_in_block_p (current_bb, INSN_CUID (insn),
1380 x, avail_p))
1381 return 0;
1382 else
1383 return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
1385 case PRE_DEC:
1386 case PRE_INC:
1387 case POST_DEC:
1388 case POST_INC:
1389 case PRE_MODIFY:
1390 case POST_MODIFY:
1391 return 0;
1393 case PC:
1394 case CC0: /*FIXME*/
1395 case CONST:
1396 case CONST_INT:
1397 case CONST_DOUBLE:
1398 case CONST_VECTOR:
1399 case SYMBOL_REF:
1400 case LABEL_REF:
1401 case ADDR_VEC:
1402 case ADDR_DIFF_VEC:
1403 return 1;
1405 default:
1406 break;
1409 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1411 if (fmt[i] == 'e')
1413 /* If we are about to do the last recursive call needed at this
1414 level, change it into iteration. This function is called enough
1415 to be worth it. */
1416 if (i == 0)
1417 return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
1419 else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
1420 return 0;
1422 else if (fmt[i] == 'E')
1423 for (j = 0; j < XVECLEN (x, i); j++)
1424 if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1425 return 0;
1428 return 1;
1431 /* Used for communication between mems_conflict_for_gcse_p and
1432 load_killed_in_block_p. Nonzero if mems_conflict_for_gcse_p finds a
1433 conflict between two memory references. */
1434 static int gcse_mems_conflict_p;
1436 /* Used for communication between mems_conflict_for_gcse_p and
1437 load_killed_in_block_p. A memory reference for a load instruction,
1438 mems_conflict_for_gcse_p will see if a memory store conflicts with
1439 this memory load. */
1440 static rtx gcse_mem_operand;
1442 /* DEST is the output of an instruction. If it is a memory reference, and
1443 possibly conflicts with the load found in gcse_mem_operand, then set
1444 gcse_mems_conflict_p to a nonzero value. */
1446 static void
1447 mems_conflict_for_gcse_p (dest, setter, data)
1448 rtx dest, setter ATTRIBUTE_UNUSED;
1449 void *data ATTRIBUTE_UNUSED;
1451 while (GET_CODE (dest) == SUBREG
1452 || GET_CODE (dest) == ZERO_EXTRACT
1453 || GET_CODE (dest) == SIGN_EXTRACT
1454 || GET_CODE (dest) == STRICT_LOW_PART)
1455 dest = XEXP (dest, 0);
1457 /* If DEST is not a MEM, then it will not conflict with the load. Note
1458 that function calls are assumed to clobber memory, but are handled
1459 elsewhere. */
1460 if (GET_CODE (dest) != MEM)
1461 return;
1463 /* If we are setting a MEM in our list of specially recognized MEMs,
1464 don't mark as killed this time. */
1466 if (dest == gcse_mem_operand && pre_ldst_mems != NULL)
1468 if (!find_rtx_in_ldst (dest))
1469 gcse_mems_conflict_p = 1;
1470 return;
1473 if (true_dependence (dest, GET_MODE (dest), gcse_mem_operand,
1474 rtx_addr_varies_p))
1475 gcse_mems_conflict_p = 1;
1478 /* Return nonzero if the expression in X (a memory reference) is killed
1479 in block BB before or after the insn with the CUID in UID_LIMIT.
1480 AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
1481 before UID_LIMIT.
1483 To check the entire block, set UID_LIMIT to max_uid + 1 and
1484 AVAIL_P to 0. */
1486 static int
1487 load_killed_in_block_p (bb, uid_limit, x, avail_p)
1488 basic_block bb;
1489 int uid_limit;
1490 rtx x;
1491 int avail_p;
1493 rtx list_entry = modify_mem_list[bb->index];
1494 while (list_entry)
1496 rtx setter;
1497 /* Ignore entries in the list that do not apply. */
1498 if ((avail_p
1499 && INSN_CUID (XEXP (list_entry, 0)) < uid_limit)
1500 || (! avail_p
1501 && INSN_CUID (XEXP (list_entry, 0)) > uid_limit))
1503 list_entry = XEXP (list_entry, 1);
1504 continue;
1507 setter = XEXP (list_entry, 0);
1509 /* If SETTER is a call everything is clobbered. Note that calls
1510 to pure functions are never put on the list, so we need not
1511 worry about them. */
1512 if (GET_CODE (setter) == CALL_INSN)
1513 return 1;
1515 /* SETTER must be an INSN of some kind that sets memory. Call
1516 note_stores to examine each hunk of memory that is modified.
1518 The note_stores interface is pretty limited, so we have to
1519 communicate via global variables. Yuk. */
1520 gcse_mem_operand = x;
1521 gcse_mems_conflict_p = 0;
1522 note_stores (PATTERN (setter), mems_conflict_for_gcse_p, NULL);
1523 if (gcse_mems_conflict_p)
1524 return 1;
1525 list_entry = XEXP (list_entry, 1);
1527 return 0;
1530 /* Return nonzero if the operands of expression X are unchanged from
1531 the start of INSN's basic block up to but not including INSN. */
1533 static int
1534 oprs_anticipatable_p (x, insn)
1535 rtx x, insn;
1537 return oprs_unchanged_p (x, insn, 0);
1540 /* Return nonzero if the operands of expression X are unchanged from
1541 INSN to the end of INSN's basic block. */
1543 static int
1544 oprs_available_p (x, insn)
1545 rtx x, insn;
1547 return oprs_unchanged_p (x, insn, 1);
1550 /* Hash expression X.
1552 MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean
1553 indicating if a volatile operand is found or if the expression contains
1554 something we don't want to insert in the table.
1556 ??? One might want to merge this with canon_hash. Later. */
1558 static unsigned int
1559 hash_expr (x, mode, do_not_record_p, hash_table_size)
1560 rtx x;
1561 enum machine_mode mode;
1562 int *do_not_record_p;
1563 int hash_table_size;
1565 unsigned int hash;
1567 *do_not_record_p = 0;
1569 hash = hash_expr_1 (x, mode, do_not_record_p);
1570 return hash % hash_table_size;
1573 /* Hash a string. Just add its bytes up. */
1575 static inline unsigned
1576 hash_string_1 (ps)
1577 const char *ps;
1579 unsigned hash = 0;
1580 const unsigned char *p = (const unsigned char *) ps;
1582 if (p)
1583 while (*p)
1584 hash += *p++;
1586 return hash;
1589 /* Subroutine of hash_expr to do the actual work. */
1591 static unsigned int
1592 hash_expr_1 (x, mode, do_not_record_p)
1593 rtx x;
1594 enum machine_mode mode;
1595 int *do_not_record_p;
1597 int i, j;
1598 unsigned hash = 0;
1599 enum rtx_code code;
1600 const char *fmt;
1602 /* Used to turn recursion into iteration. We can't rely on GCC's
1603 tail-recursion elimination since we need to keep accumulating values
1604 in HASH. */
1606 if (x == 0)
1607 return hash;
1609 repeat:
1610 code = GET_CODE (x);
1611 switch (code)
1613 case REG:
1614 hash += ((unsigned int) REG << 7) + REGNO (x);
1615 return hash;
1617 case CONST_INT:
1618 hash += (((unsigned int) CONST_INT << 7) + (unsigned int) mode
1619 + (unsigned int) INTVAL (x));
1620 return hash;
1622 case CONST_DOUBLE:
1623 /* This is like the general case, except that it only counts
1624 the integers representing the constant. */
1625 hash += (unsigned int) code + (unsigned int) GET_MODE (x);
1626 if (GET_MODE (x) != VOIDmode)
1627 for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++)
1628 hash += (unsigned int) XWINT (x, i);
1629 else
1630 hash += ((unsigned int) CONST_DOUBLE_LOW (x)
1631 + (unsigned int) CONST_DOUBLE_HIGH (x));
1632 return hash;
1634 case CONST_VECTOR:
1636 int units;
1637 rtx elt;
1639 units = CONST_VECTOR_NUNITS (x);
1641 for (i = 0; i < units; ++i)
1643 elt = CONST_VECTOR_ELT (x, i);
1644 hash += hash_expr_1 (elt, GET_MODE (elt), do_not_record_p);
1647 return hash;
1650 /* Assume there is only one rtx object for any given label. */
1651 case LABEL_REF:
1652 /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1653 differences and differences between each stage's debugging dumps. */
1654 hash += (((unsigned int) LABEL_REF << 7)
1655 + CODE_LABEL_NUMBER (XEXP (x, 0)));
1656 return hash;
1658 case SYMBOL_REF:
1660 /* Don't hash on the symbol's address to avoid bootstrap differences.
1661 Different hash values may cause expressions to be recorded in
1662 different orders and thus different registers to be used in the
1663 final assembler. This also avoids differences in the dump files
1664 between various stages. */
1665 unsigned int h = 0;
1666 const unsigned char *p = (const unsigned char *) XSTR (x, 0);
1668 while (*p)
1669 h += (h << 7) + *p++; /* ??? revisit */
1671 hash += ((unsigned int) SYMBOL_REF << 7) + h;
1672 return hash;
1675 case MEM:
1676 if (MEM_VOLATILE_P (x))
1678 *do_not_record_p = 1;
1679 return 0;
1682 hash += (unsigned int) MEM;
1683 /* We used alias set for hashing, but this is not good, since the alias
1684 set may differ in -fprofile-arcs and -fbranch-probabilities compilation
1685 causing the profiles to fail to match. */
1686 x = XEXP (x, 0);
1687 goto repeat;
1689 case PRE_DEC:
1690 case PRE_INC:
1691 case POST_DEC:
1692 case POST_INC:
1693 case PC:
1694 case CC0:
1695 case CALL:
1696 case UNSPEC_VOLATILE:
1697 *do_not_record_p = 1;
1698 return 0;
1700 case ASM_OPERANDS:
1701 if (MEM_VOLATILE_P (x))
1703 *do_not_record_p = 1;
1704 return 0;
1706 else
1708 /* We don't want to take the filename and line into account. */
1709 hash += (unsigned) code + (unsigned) GET_MODE (x)
1710 + hash_string_1 (ASM_OPERANDS_TEMPLATE (x))
1711 + hash_string_1 (ASM_OPERANDS_OUTPUT_CONSTRAINT (x))
1712 + (unsigned) ASM_OPERANDS_OUTPUT_IDX (x);
1714 if (ASM_OPERANDS_INPUT_LENGTH (x))
1716 for (i = 1; i < ASM_OPERANDS_INPUT_LENGTH (x); i++)
1718 hash += (hash_expr_1 (ASM_OPERANDS_INPUT (x, i),
1719 GET_MODE (ASM_OPERANDS_INPUT (x, i)),
1720 do_not_record_p)
1721 + hash_string_1 (ASM_OPERANDS_INPUT_CONSTRAINT
1722 (x, i)));
1725 hash += hash_string_1 (ASM_OPERANDS_INPUT_CONSTRAINT (x, 0));
1726 x = ASM_OPERANDS_INPUT (x, 0);
1727 mode = GET_MODE (x);
1728 goto repeat;
1730 return hash;
1733 default:
1734 break;
1737 hash += (unsigned) code + (unsigned) GET_MODE (x);
1738 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1740 if (fmt[i] == 'e')
1742 /* If we are about to do the last recursive call
1743 needed at this level, change it into iteration.
1744 This function is called enough to be worth it. */
1745 if (i == 0)
1747 x = XEXP (x, i);
1748 goto repeat;
1751 hash += hash_expr_1 (XEXP (x, i), 0, do_not_record_p);
1752 if (*do_not_record_p)
1753 return 0;
1756 else if (fmt[i] == 'E')
1757 for (j = 0; j < XVECLEN (x, i); j++)
1759 hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p);
1760 if (*do_not_record_p)
1761 return 0;
1764 else if (fmt[i] == 's')
1765 hash += hash_string_1 (XSTR (x, i));
1766 else if (fmt[i] == 'i')
1767 hash += (unsigned int) XINT (x, i);
1768 else
1769 abort ();
1772 return hash;
1775 /* Hash a set of register REGNO.
1777 Sets are hashed on the register that is set. This simplifies the PRE copy
1778 propagation code.
1780 ??? May need to make things more elaborate. Later, as necessary. */
1782 static unsigned int
1783 hash_set (regno, hash_table_size)
1784 int regno;
1785 int hash_table_size;
1787 unsigned int hash;
1789 hash = regno;
1790 return hash % hash_table_size;
1793 /* Return nonzero if exp1 is equivalent to exp2.
1794 ??? Borrowed from cse.c. Might want to remerge with cse.c. Later. */
1796 static int
1797 expr_equiv_p (x, y)
1798 rtx x, y;
1800 int i, j;
1801 enum rtx_code code;
1802 const char *fmt;
1804 if (x == y)
1805 return 1;
1807 if (x == 0 || y == 0)
1808 return x == y;
1810 code = GET_CODE (x);
1811 if (code != GET_CODE (y))
1812 return 0;
1814 /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent. */
1815 if (GET_MODE (x) != GET_MODE (y))
1816 return 0;
1818 switch (code)
1820 case PC:
1821 case CC0:
1822 return x == y;
1824 case CONST_INT:
1825 return INTVAL (x) == INTVAL (y);
1827 case LABEL_REF:
1828 return XEXP (x, 0) == XEXP (y, 0);
1830 case SYMBOL_REF:
1831 return XSTR (x, 0) == XSTR (y, 0);
1833 case REG:
1834 return REGNO (x) == REGNO (y);
1836 case MEM:
1837 /* Can't merge two expressions in different alias sets, since we can
1838 decide that the expression is transparent in a block when it isn't,
1839 due to it being set with the different alias set. */
1840 if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y))
1841 return 0;
1842 break;
1844 /* For commutative operations, check both orders. */
1845 case PLUS:
1846 case MULT:
1847 case AND:
1848 case IOR:
1849 case XOR:
1850 case NE:
1851 case EQ:
1852 return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0))
1853 && expr_equiv_p (XEXP (x, 1), XEXP (y, 1)))
1854 || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1))
1855 && expr_equiv_p (XEXP (x, 1), XEXP (y, 0))));
1857 case ASM_OPERANDS:
1858 /* We don't use the generic code below because we want to
1859 disregard filename and line numbers. */
1861 /* A volatile asm isn't equivalent to any other. */
1862 if (MEM_VOLATILE_P (x) || MEM_VOLATILE_P (y))
1863 return 0;
1865 if (GET_MODE (x) != GET_MODE (y)
1866 || strcmp (ASM_OPERANDS_TEMPLATE (x), ASM_OPERANDS_TEMPLATE (y))
1867 || strcmp (ASM_OPERANDS_OUTPUT_CONSTRAINT (x),
1868 ASM_OPERANDS_OUTPUT_CONSTRAINT (y))
1869 || ASM_OPERANDS_OUTPUT_IDX (x) != ASM_OPERANDS_OUTPUT_IDX (y)
1870 || ASM_OPERANDS_INPUT_LENGTH (x) != ASM_OPERANDS_INPUT_LENGTH (y))
1871 return 0;
1873 if (ASM_OPERANDS_INPUT_LENGTH (x))
1875 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
1876 if (! expr_equiv_p (ASM_OPERANDS_INPUT (x, i),
1877 ASM_OPERANDS_INPUT (y, i))
1878 || strcmp (ASM_OPERANDS_INPUT_CONSTRAINT (x, i),
1879 ASM_OPERANDS_INPUT_CONSTRAINT (y, i)))
1880 return 0;
1883 return 1;
1885 default:
1886 break;
1889 /* Compare the elements. If any pair of corresponding elements
1890 fail to match, return 0 for the whole thing. */
1892 fmt = GET_RTX_FORMAT (code);
1893 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1895 switch (fmt[i])
1897 case 'e':
1898 if (! expr_equiv_p (XEXP (x, i), XEXP (y, i)))
1899 return 0;
1900 break;
1902 case 'E':
1903 if (XVECLEN (x, i) != XVECLEN (y, i))
1904 return 0;
1905 for (j = 0; j < XVECLEN (x, i); j++)
1906 if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j)))
1907 return 0;
1908 break;
1910 case 's':
1911 if (strcmp (XSTR (x, i), XSTR (y, i)))
1912 return 0;
1913 break;
1915 case 'i':
1916 if (XINT (x, i) != XINT (y, i))
1917 return 0;
1918 break;
1920 case 'w':
1921 if (XWINT (x, i) != XWINT (y, i))
1922 return 0;
1923 break;
1925 case '0':
1926 break;
1928 default:
1929 abort ();
1933 return 1;
1936 /* Insert expression X in INSN in the hash TABLE.
1937 If it is already present, record it as the last occurrence in INSN's
1938 basic block.
1940 MODE is the mode of the value X is being stored into.
1941 It is only used if X is a CONST_INT.
1943 ANTIC_P is nonzero if X is an anticipatable expression.
1944 AVAIL_P is nonzero if X is an available expression. */
1946 static void
1947 insert_expr_in_table (x, mode, insn, antic_p, avail_p, table)
1948 rtx x;
1949 enum machine_mode mode;
1950 rtx insn;
1951 int antic_p, avail_p;
1952 struct hash_table *table;
1954 int found, do_not_record_p;
1955 unsigned int hash;
1956 struct expr *cur_expr, *last_expr = NULL;
1957 struct occr *antic_occr, *avail_occr;
1958 struct occr *last_occr = NULL;
1960 hash = hash_expr (x, mode, &do_not_record_p, table->size);
1962 /* Do not insert expression in table if it contains volatile operands,
1963 or if hash_expr determines the expression is something we don't want
1964 to or can't handle. */
1965 if (do_not_record_p)
1966 return;
1968 cur_expr = table->table[hash];
1969 found = 0;
1971 while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1973 /* If the expression isn't found, save a pointer to the end of
1974 the list. */
1975 last_expr = cur_expr;
1976 cur_expr = cur_expr->next_same_hash;
1979 if (! found)
1981 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1982 bytes_used += sizeof (struct expr);
1983 if (table->table[hash] == NULL)
1984 /* This is the first pattern that hashed to this index. */
1985 table->table[hash] = cur_expr;
1986 else
1987 /* Add EXPR to end of this hash chain. */
1988 last_expr->next_same_hash = cur_expr;
1990 /* Set the fields of the expr element. */
1991 cur_expr->expr = x;
1992 cur_expr->bitmap_index = table->n_elems++;
1993 cur_expr->next_same_hash = NULL;
1994 cur_expr->antic_occr = NULL;
1995 cur_expr->avail_occr = NULL;
1998 /* Now record the occurrence(s). */
1999 if (antic_p)
2001 antic_occr = cur_expr->antic_occr;
2003 /* Search for another occurrence in the same basic block. */
2004 while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
2006 /* If an occurrence isn't found, save a pointer to the end of
2007 the list. */
2008 last_occr = antic_occr;
2009 antic_occr = antic_occr->next;
2012 if (antic_occr)
2013 /* Found another instance of the expression in the same basic block.
2014 Prefer the currently recorded one. We want the first one in the
2015 block and the block is scanned from start to end. */
2016 ; /* nothing to do */
2017 else
2019 /* First occurrence of this expression in this basic block. */
2020 antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
2021 bytes_used += sizeof (struct occr);
2022 /* First occurrence of this expression in any block? */
2023 if (cur_expr->antic_occr == NULL)
2024 cur_expr->antic_occr = antic_occr;
2025 else
2026 last_occr->next = antic_occr;
2028 antic_occr->insn = insn;
2029 antic_occr->next = NULL;
2033 if (avail_p)
2035 avail_occr = cur_expr->avail_occr;
2037 /* Search for another occurrence in the same basic block. */
2038 while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn))
2040 /* If an occurrence isn't found, save a pointer to the end of
2041 the list. */
2042 last_occr = avail_occr;
2043 avail_occr = avail_occr->next;
2046 if (avail_occr)
2047 /* Found another instance of the expression in the same basic block.
2048 Prefer this occurrence to the currently recorded one. We want
2049 the last one in the block and the block is scanned from start
2050 to end. */
2051 avail_occr->insn = insn;
2052 else
2054 /* First occurrence of this expression in this basic block. */
2055 avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
2056 bytes_used += sizeof (struct occr);
2058 /* First occurrence of this expression in any block? */
2059 if (cur_expr->avail_occr == NULL)
2060 cur_expr->avail_occr = avail_occr;
2061 else
2062 last_occr->next = avail_occr;
2064 avail_occr->insn = insn;
2065 avail_occr->next = NULL;
2070 /* Insert pattern X in INSN in the hash table.
2071 X is a SET of a reg to either another reg or a constant.
2072 If it is already present, record it as the last occurrence in INSN's
2073 basic block. */
2075 static void
2076 insert_set_in_table (x, insn, table)
2077 rtx x;
2078 rtx insn;
2079 struct hash_table *table;
2081 int found;
2082 unsigned int hash;
2083 struct expr *cur_expr, *last_expr = NULL;
2084 struct occr *cur_occr, *last_occr = NULL;
2086 if (GET_CODE (x) != SET
2087 || GET_CODE (SET_DEST (x)) != REG)
2088 abort ();
2090 hash = hash_set (REGNO (SET_DEST (x)), table->size);
2092 cur_expr = table->table[hash];
2093 found = 0;
2095 while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
2097 /* If the expression isn't found, save a pointer to the end of
2098 the list. */
2099 last_expr = cur_expr;
2100 cur_expr = cur_expr->next_same_hash;
2103 if (! found)
2105 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
2106 bytes_used += sizeof (struct expr);
2107 if (table->table[hash] == NULL)
2108 /* This is the first pattern that hashed to this index. */
2109 table->table[hash] = cur_expr;
2110 else
2111 /* Add EXPR to end of this hash chain. */
2112 last_expr->next_same_hash = cur_expr;
2114 /* Set the fields of the expr element.
2115 We must copy X because it can be modified when copy propagation is
2116 performed on its operands. */
2117 cur_expr->expr = copy_rtx (x);
2118 cur_expr->bitmap_index = table->n_elems++;
2119 cur_expr->next_same_hash = NULL;
2120 cur_expr->antic_occr = NULL;
2121 cur_expr->avail_occr = NULL;
2124 /* Now record the occurrence. */
2125 cur_occr = cur_expr->avail_occr;
2127 /* Search for another occurrence in the same basic block. */
2128 while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn))
2130 /* If an occurrence isn't found, save a pointer to the end of
2131 the list. */
2132 last_occr = cur_occr;
2133 cur_occr = cur_occr->next;
2136 if (cur_occr)
2137 /* Found another instance of the expression in the same basic block.
2138 Prefer this occurrence to the currently recorded one. We want the
2139 last one in the block and the block is scanned from start to end. */
2140 cur_occr->insn = insn;
2141 else
2143 /* First occurrence of this expression in this basic block. */
2144 cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
2145 bytes_used += sizeof (struct occr);
2147 /* First occurrence of this expression in any block? */
2148 if (cur_expr->avail_occr == NULL)
2149 cur_expr->avail_occr = cur_occr;
2150 else
2151 last_occr->next = cur_occr;
2153 cur_occr->insn = insn;
2154 cur_occr->next = NULL;
2158 /* Scan pattern PAT of INSN and add an entry to the hash TABLE (set or
2159 expression one). */
2161 static void
2162 hash_scan_set (pat, insn, table)
2163 rtx pat, insn;
2164 struct hash_table *table;
2166 rtx src = SET_SRC (pat);
2167 rtx dest = SET_DEST (pat);
2168 rtx note;
2170 if (GET_CODE (src) == CALL)
2171 hash_scan_call (src, insn, table);
2173 else if (GET_CODE (dest) == REG)
2175 unsigned int regno = REGNO (dest);
2176 rtx tmp;
2178 /* If this is a single set and we are doing constant propagation,
2179 see if a REG_NOTE shows this equivalent to a constant. */
2180 if (table->set_p && (note = find_reg_equal_equiv_note (insn)) != 0
2181 && CONSTANT_P (XEXP (note, 0)))
2182 src = XEXP (note, 0), pat = gen_rtx_SET (VOIDmode, dest, src);
2184 /* Only record sets of pseudo-regs in the hash table. */
2185 if (! table->set_p
2186 && regno >= FIRST_PSEUDO_REGISTER
2187 /* Don't GCSE something if we can't do a reg/reg copy. */
2188 && can_copy_p [GET_MODE (dest)]
2189 /* GCSE commonly inserts instruction after the insn. We can't
2190 do that easily for EH_REGION notes so disable GCSE on these
2191 for now. */
2192 && !find_reg_note (insn, REG_EH_REGION, NULL_RTX)
2193 /* Is SET_SRC something we want to gcse? */
2194 && want_to_gcse_p (src)
2195 /* Don't CSE a nop. */
2196 && ! set_noop_p (pat)
2197 /* Don't GCSE if it has attached REG_EQUIV note.
2198 At this point this only function parameters should have
2199 REG_EQUIV notes and if the argument slot is used somewhere
2200 explicitly, it means address of parameter has been taken,
2201 so we should not extend the lifetime of the pseudo. */
2202 && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
2203 || GET_CODE (XEXP (note, 0)) != MEM))
2205 /* An expression is not anticipatable if its operands are
2206 modified before this insn or if this is not the only SET in
2207 this insn. */
2208 int antic_p = oprs_anticipatable_p (src, insn) && single_set (insn);
2209 /* An expression is not available if its operands are
2210 subsequently modified, including this insn. It's also not
2211 available if this is a branch, because we can't insert
2212 a set after the branch. */
2213 int avail_p = (oprs_available_p (src, insn)
2214 && ! JUMP_P (insn));
2216 insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p, table);
2219 /* Record sets for constant/copy propagation. */
2220 else if (table->set_p
2221 && regno >= FIRST_PSEUDO_REGISTER
2222 && ((GET_CODE (src) == REG
2223 && REGNO (src) >= FIRST_PSEUDO_REGISTER
2224 && can_copy_p [GET_MODE (dest)]
2225 && REGNO (src) != regno)
2226 || (CONSTANT_P (src)
2227 && GET_CODE (src) != CONSTANT_P_RTX))
2228 /* A copy is not available if its src or dest is subsequently
2229 modified. Here we want to search from INSN+1 on, but
2230 oprs_available_p searches from INSN on. */
2231 && (insn == BLOCK_END (BLOCK_NUM (insn))
2232 || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
2233 && oprs_available_p (pat, tmp))))
2234 insert_set_in_table (pat, insn, table);
2238 static void
2239 hash_scan_clobber (x, insn, table)
2240 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
2241 struct hash_table *table ATTRIBUTE_UNUSED;
2243 /* Currently nothing to do. */
2246 static void
2247 hash_scan_call (x, insn, table)
2248 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
2249 struct hash_table *table ATTRIBUTE_UNUSED;
2251 /* Currently nothing to do. */
2254 /* Process INSN and add hash table entries as appropriate.
2256 Only available expressions that set a single pseudo-reg are recorded.
2258 Single sets in a PARALLEL could be handled, but it's an extra complication
2259 that isn't dealt with right now. The trick is handling the CLOBBERs that
2260 are also in the PARALLEL. Later.
2262 If SET_P is nonzero, this is for the assignment hash table,
2263 otherwise it is for the expression hash table.
2264 If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
2265 not record any expressions. */
2267 static void
2268 hash_scan_insn (insn, table, in_libcall_block)
2269 rtx insn;
2270 struct hash_table *table;
2271 int in_libcall_block;
2273 rtx pat = PATTERN (insn);
2274 int i;
2276 if (in_libcall_block)
2277 return;
2279 /* Pick out the sets of INSN and for other forms of instructions record
2280 what's been modified. */
2282 if (GET_CODE (pat) == SET)
2283 hash_scan_set (pat, insn, table);
2284 else if (GET_CODE (pat) == PARALLEL)
2285 for (i = 0; i < XVECLEN (pat, 0); i++)
2287 rtx x = XVECEXP (pat, 0, i);
2289 if (GET_CODE (x) == SET)
2290 hash_scan_set (x, insn, table);
2291 else if (GET_CODE (x) == CLOBBER)
2292 hash_scan_clobber (x, insn, table);
2293 else if (GET_CODE (x) == CALL)
2294 hash_scan_call (x, insn, table);
2297 else if (GET_CODE (pat) == CLOBBER)
2298 hash_scan_clobber (pat, insn, table);
2299 else if (GET_CODE (pat) == CALL)
2300 hash_scan_call (pat, insn, table);
2303 static void
2304 dump_hash_table (file, name, table)
2305 FILE *file;
2306 const char *name;
2307 struct hash_table *table;
2309 int i;
2310 /* Flattened out table, so it's printed in proper order. */
2311 struct expr **flat_table;
2312 unsigned int *hash_val;
2313 struct expr *expr;
2315 flat_table
2316 = (struct expr **) xcalloc (table->n_elems, sizeof (struct expr *));
2317 hash_val = (unsigned int *) xmalloc (table->n_elems * sizeof (unsigned int));
2319 for (i = 0; i < (int) table->size; i++)
2320 for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
2322 flat_table[expr->bitmap_index] = expr;
2323 hash_val[expr->bitmap_index] = i;
2326 fprintf (file, "%s hash table (%d buckets, %d entries)\n",
2327 name, table->size, table->n_elems);
2329 for (i = 0; i < (int) table->n_elems; i++)
2330 if (flat_table[i] != 0)
2332 expr = flat_table[i];
2333 fprintf (file, "Index %d (hash value %d)\n ",
2334 expr->bitmap_index, hash_val[i]);
2335 print_rtl (file, expr->expr);
2336 fprintf (file, "\n");
2339 fprintf (file, "\n");
2341 free (flat_table);
2342 free (hash_val);
2345 /* Record register first/last/block set information for REGNO in INSN.
2347 first_set records the first place in the block where the register
2348 is set and is used to compute "anticipatability".
2350 last_set records the last place in the block where the register
2351 is set and is used to compute "availability".
2353 last_bb records the block for which first_set and last_set are
2354 valid, as a quick test to invalidate them.
2356 reg_set_in_block records whether the register is set in the block
2357 and is used to compute "transparency". */
2359 static void
2360 record_last_reg_set_info (insn, regno)
2361 rtx insn;
2362 int regno;
2364 struct reg_avail_info *info = &reg_avail_info[regno];
2365 int cuid = INSN_CUID (insn);
2367 info->last_set = cuid;
2368 if (info->last_bb != current_bb)
2370 info->last_bb = current_bb;
2371 info->first_set = cuid;
2372 SET_BIT (reg_set_in_block[current_bb->index], regno);
2377 /* Record all of the canonicalized MEMs of record_last_mem_set_info's insn.
2378 Note we store a pair of elements in the list, so they have to be
2379 taken off pairwise. */
2381 static void
2382 canon_list_insert (dest, unused1, v_insn)
2383 rtx dest ATTRIBUTE_UNUSED;
2384 rtx unused1 ATTRIBUTE_UNUSED;
2385 void * v_insn;
2387 rtx dest_addr, insn;
2388 int bb;
2390 while (GET_CODE (dest) == SUBREG
2391 || GET_CODE (dest) == ZERO_EXTRACT
2392 || GET_CODE (dest) == SIGN_EXTRACT
2393 || GET_CODE (dest) == STRICT_LOW_PART)
2394 dest = XEXP (dest, 0);
2396 /* If DEST is not a MEM, then it will not conflict with a load. Note
2397 that function calls are assumed to clobber memory, but are handled
2398 elsewhere. */
2400 if (GET_CODE (dest) != MEM)
2401 return;
2403 dest_addr = get_addr (XEXP (dest, 0));
2404 dest_addr = canon_rtx (dest_addr);
2405 insn = (rtx) v_insn;
2406 bb = BLOCK_NUM (insn);
2408 canon_modify_mem_list[bb] =
2409 alloc_EXPR_LIST (VOIDmode, dest_addr, canon_modify_mem_list[bb]);
2410 canon_modify_mem_list[bb] =
2411 alloc_EXPR_LIST (VOIDmode, dest, canon_modify_mem_list[bb]);
2412 bitmap_set_bit (canon_modify_mem_list_set, bb);
2415 /* Record memory modification information for INSN. We do not actually care
2416 about the memory location(s) that are set, or even how they are set (consider
2417 a CALL_INSN). We merely need to record which insns modify memory. */
2419 static void
2420 record_last_mem_set_info (insn)
2421 rtx insn;
2423 int bb = BLOCK_NUM (insn);
2425 /* load_killed_in_block_p will handle the case of calls clobbering
2426 everything. */
2427 modify_mem_list[bb] = alloc_INSN_LIST (insn, modify_mem_list[bb]);
2428 bitmap_set_bit (modify_mem_list_set, bb);
2430 if (GET_CODE (insn) == CALL_INSN)
2432 /* Note that traversals of this loop (other than for free-ing)
2433 will break after encountering a CALL_INSN. So, there's no
2434 need to insert a pair of items, as canon_list_insert does. */
2435 canon_modify_mem_list[bb] =
2436 alloc_INSN_LIST (insn, canon_modify_mem_list[bb]);
2437 bitmap_set_bit (canon_modify_mem_list_set, bb);
2439 else
2440 note_stores (PATTERN (insn), canon_list_insert, (void*) insn);
2443 /* Called from compute_hash_table via note_stores to handle one
2444 SET or CLOBBER in an insn. DATA is really the instruction in which
2445 the SET is taking place. */
2447 static void
2448 record_last_set_info (dest, setter, data)
2449 rtx dest, setter ATTRIBUTE_UNUSED;
2450 void *data;
2452 rtx last_set_insn = (rtx) data;
2454 if (GET_CODE (dest) == SUBREG)
2455 dest = SUBREG_REG (dest);
2457 if (GET_CODE (dest) == REG)
2458 record_last_reg_set_info (last_set_insn, REGNO (dest));
2459 else if (GET_CODE (dest) == MEM
2460 /* Ignore pushes, they clobber nothing. */
2461 && ! push_operand (dest, GET_MODE (dest)))
2462 record_last_mem_set_info (last_set_insn);
2465 /* Top level function to create an expression or assignment hash table.
2467 Expression entries are placed in the hash table if
2468 - they are of the form (set (pseudo-reg) src),
2469 - src is something we want to perform GCSE on,
2470 - none of the operands are subsequently modified in the block
2472 Assignment entries are placed in the hash table if
2473 - they are of the form (set (pseudo-reg) src),
2474 - src is something we want to perform const/copy propagation on,
2475 - none of the operands or target are subsequently modified in the block
2477 Currently src must be a pseudo-reg or a const_int.
2479 TABLE is the table computed. */
2481 static void
2482 compute_hash_table_work (table)
2483 struct hash_table *table;
2485 unsigned int i;
2487 /* While we compute the hash table we also compute a bit array of which
2488 registers are set in which blocks.
2489 ??? This isn't needed during const/copy propagation, but it's cheap to
2490 compute. Later. */
2491 sbitmap_vector_zero (reg_set_in_block, last_basic_block);
2493 /* re-Cache any INSN_LIST nodes we have allocated. */
2494 clear_modify_mem_tables ();
2495 /* Some working arrays used to track first and last set in each block. */
2496 reg_avail_info = (struct reg_avail_info*)
2497 gmalloc (max_gcse_regno * sizeof (struct reg_avail_info));
2499 for (i = 0; i < max_gcse_regno; ++i)
2500 reg_avail_info[i].last_bb = NULL;
2502 FOR_EACH_BB (current_bb)
2504 rtx insn;
2505 unsigned int regno;
2506 int in_libcall_block;
2508 /* First pass over the instructions records information used to
2509 determine when registers and memory are first and last set.
2510 ??? hard-reg reg_set_in_block computation
2511 could be moved to compute_sets since they currently don't change. */
2513 for (insn = current_bb->head;
2514 insn && insn != NEXT_INSN (current_bb->end);
2515 insn = NEXT_INSN (insn))
2517 if (! INSN_P (insn))
2518 continue;
2520 if (GET_CODE (insn) == CALL_INSN)
2522 bool clobbers_all = false;
2523 #ifdef NON_SAVING_SETJMP
2524 if (NON_SAVING_SETJMP
2525 && find_reg_note (insn, REG_SETJMP, NULL_RTX))
2526 clobbers_all = true;
2527 #endif
2529 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2530 if (clobbers_all
2531 || TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
2532 record_last_reg_set_info (insn, regno);
2534 mark_call (insn);
2537 note_stores (PATTERN (insn), record_last_set_info, insn);
2540 /* Insert implicit sets in the hash table. */
2541 if (table->set_p
2542 && implicit_sets[current_bb->index] != NULL_RTX)
2543 hash_scan_set (implicit_sets[current_bb->index],
2544 current_bb->head, table);
2546 /* The next pass builds the hash table. */
2548 for (insn = current_bb->head, in_libcall_block = 0;
2549 insn && insn != NEXT_INSN (current_bb->end);
2550 insn = NEXT_INSN (insn))
2551 if (INSN_P (insn))
2553 if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2554 in_libcall_block = 1;
2555 else if (table->set_p && find_reg_note (insn, REG_RETVAL, NULL_RTX))
2556 in_libcall_block = 0;
2557 hash_scan_insn (insn, table, in_libcall_block);
2558 if (!table->set_p && find_reg_note (insn, REG_RETVAL, NULL_RTX))
2559 in_libcall_block = 0;
2563 free (reg_avail_info);
2564 reg_avail_info = NULL;
2567 /* Allocate space for the set/expr hash TABLE.
2568 N_INSNS is the number of instructions in the function.
2569 It is used to determine the number of buckets to use.
2570 SET_P determines whether set or expression table will
2571 be created. */
2573 static void
2574 alloc_hash_table (n_insns, table, set_p)
2575 int n_insns;
2576 struct hash_table *table;
2577 int set_p;
2579 int n;
2581 table->size = n_insns / 4;
2582 if (table->size < 11)
2583 table->size = 11;
2585 /* Attempt to maintain efficient use of hash table.
2586 Making it an odd number is simplest for now.
2587 ??? Later take some measurements. */
2588 table->size |= 1;
2589 n = table->size * sizeof (struct expr *);
2590 table->table = (struct expr **) gmalloc (n);
2591 table->set_p = set_p;
2594 /* Free things allocated by alloc_hash_table. */
2596 static void
2597 free_hash_table (table)
2598 struct hash_table *table;
2600 free (table->table);
2603 /* Compute the hash TABLE for doing copy/const propagation or
2604 expression hash table. */
2606 static void
2607 compute_hash_table (table)
2608 struct hash_table *table;
2610 /* Initialize count of number of entries in hash table. */
2611 table->n_elems = 0;
2612 memset ((char *) table->table, 0,
2613 table->size * sizeof (struct expr *));
2615 compute_hash_table_work (table);
2618 /* Expression tracking support. */
2620 /* Lookup pattern PAT in the expression TABLE.
2621 The result is a pointer to the table entry, or NULL if not found. */
2623 static struct expr *
2624 lookup_expr (pat, table)
2625 rtx pat;
2626 struct hash_table *table;
2628 int do_not_record_p;
2629 unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p,
2630 table->size);
2631 struct expr *expr;
2633 if (do_not_record_p)
2634 return NULL;
2636 expr = table->table[hash];
2638 while (expr && ! expr_equiv_p (expr->expr, pat))
2639 expr = expr->next_same_hash;
2641 return expr;
2644 /* Lookup REGNO in the set TABLE. If PAT is non-NULL look for the entry that
2645 matches it, otherwise return the first entry for REGNO. The result is a
2646 pointer to the table entry, or NULL if not found. */
2648 static struct expr *
2649 lookup_set (regno, pat, table)
2650 unsigned int regno;
2651 rtx pat;
2652 struct hash_table *table;
2654 unsigned int hash = hash_set (regno, table->size);
2655 struct expr *expr;
2657 expr = table->table[hash];
2659 if (pat)
2661 while (expr && ! expr_equiv_p (expr->expr, pat))
2662 expr = expr->next_same_hash;
2664 else
2666 while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2667 expr = expr->next_same_hash;
2670 return expr;
2673 /* Return the next entry for REGNO in list EXPR. */
2675 static struct expr *
2676 next_set (regno, expr)
2677 unsigned int regno;
2678 struct expr *expr;
2681 expr = expr->next_same_hash;
2682 while (expr && REGNO (SET_DEST (expr->expr)) != regno);
2684 return expr;
2687 /* Like free_INSN_LIST_list or free_EXPR_LIST_list, except that the node
2688 types may be mixed. */
2690 static void
2691 free_insn_expr_list_list (listp)
2692 rtx *listp;
2694 rtx list, next;
2696 for (list = *listp; list ; list = next)
2698 next = XEXP (list, 1);
2699 if (GET_CODE (list) == EXPR_LIST)
2700 free_EXPR_LIST_node (list);
2701 else
2702 free_INSN_LIST_node (list);
2705 *listp = NULL;
2708 /* Clear canon_modify_mem_list and modify_mem_list tables. */
2709 static void
2710 clear_modify_mem_tables ()
2712 int i;
2714 EXECUTE_IF_SET_IN_BITMAP
2715 (modify_mem_list_set, 0, i, free_INSN_LIST_list (modify_mem_list + i));
2716 bitmap_clear (modify_mem_list_set);
2718 EXECUTE_IF_SET_IN_BITMAP
2719 (canon_modify_mem_list_set, 0, i,
2720 free_insn_expr_list_list (canon_modify_mem_list + i));
2721 bitmap_clear (canon_modify_mem_list_set);
2724 /* Release memory used by modify_mem_list_set and canon_modify_mem_list_set. */
2726 static void
2727 free_modify_mem_tables ()
2729 clear_modify_mem_tables ();
2730 free (modify_mem_list);
2731 free (canon_modify_mem_list);
2732 modify_mem_list = 0;
2733 canon_modify_mem_list = 0;
2736 /* Reset tables used to keep track of what's still available [since the
2737 start of the block]. */
2739 static void
2740 reset_opr_set_tables ()
2742 /* Maintain a bitmap of which regs have been set since beginning of
2743 the block. */
2744 CLEAR_REG_SET (reg_set_bitmap);
2746 /* Also keep a record of the last instruction to modify memory.
2747 For now this is very trivial, we only record whether any memory
2748 location has been modified. */
2749 clear_modify_mem_tables ();
2752 /* Return nonzero if the operands of X are not set before INSN in
2753 INSN's basic block. */
2755 static int
2756 oprs_not_set_p (x, insn)
2757 rtx x, insn;
2759 int i, j;
2760 enum rtx_code code;
2761 const char *fmt;
2763 if (x == 0)
2764 return 1;
2766 code = GET_CODE (x);
2767 switch (code)
2769 case PC:
2770 case CC0:
2771 case CONST:
2772 case CONST_INT:
2773 case CONST_DOUBLE:
2774 case CONST_VECTOR:
2775 case SYMBOL_REF:
2776 case LABEL_REF:
2777 case ADDR_VEC:
2778 case ADDR_DIFF_VEC:
2779 return 1;
2781 case MEM:
2782 if (load_killed_in_block_p (BLOCK_FOR_INSN (insn),
2783 INSN_CUID (insn), x, 0))
2784 return 0;
2785 else
2786 return oprs_not_set_p (XEXP (x, 0), insn);
2788 case REG:
2789 return ! REGNO_REG_SET_P (reg_set_bitmap, REGNO (x));
2791 default:
2792 break;
2795 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2797 if (fmt[i] == 'e')
2799 /* If we are about to do the last recursive call
2800 needed at this level, change it into iteration.
2801 This function is called enough to be worth it. */
2802 if (i == 0)
2803 return oprs_not_set_p (XEXP (x, i), insn);
2805 if (! oprs_not_set_p (XEXP (x, i), insn))
2806 return 0;
2808 else if (fmt[i] == 'E')
2809 for (j = 0; j < XVECLEN (x, i); j++)
2810 if (! oprs_not_set_p (XVECEXP (x, i, j), insn))
2811 return 0;
2814 return 1;
2817 /* Mark things set by a CALL. */
2819 static void
2820 mark_call (insn)
2821 rtx insn;
2823 if (! CONST_OR_PURE_CALL_P (insn))
2824 record_last_mem_set_info (insn);
2827 /* Mark things set by a SET. */
2829 static void
2830 mark_set (pat, insn)
2831 rtx pat, insn;
2833 rtx dest = SET_DEST (pat);
2835 while (GET_CODE (dest) == SUBREG
2836 || GET_CODE (dest) == ZERO_EXTRACT
2837 || GET_CODE (dest) == SIGN_EXTRACT
2838 || GET_CODE (dest) == STRICT_LOW_PART)
2839 dest = XEXP (dest, 0);
2841 if (GET_CODE (dest) == REG)
2842 SET_REGNO_REG_SET (reg_set_bitmap, REGNO (dest));
2843 else if (GET_CODE (dest) == MEM)
2844 record_last_mem_set_info (insn);
2846 if (GET_CODE (SET_SRC (pat)) == CALL)
2847 mark_call (insn);
2850 /* Record things set by a CLOBBER. */
2852 static void
2853 mark_clobber (pat, insn)
2854 rtx pat, insn;
2856 rtx clob = XEXP (pat, 0);
2858 while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2859 clob = XEXP (clob, 0);
2861 if (GET_CODE (clob) == REG)
2862 SET_REGNO_REG_SET (reg_set_bitmap, REGNO (clob));
2863 else
2864 record_last_mem_set_info (insn);
2867 /* Record things set by INSN.
2868 This data is used by oprs_not_set_p. */
2870 static void
2871 mark_oprs_set (insn)
2872 rtx insn;
2874 rtx pat = PATTERN (insn);
2875 int i;
2877 if (GET_CODE (pat) == SET)
2878 mark_set (pat, insn);
2879 else if (GET_CODE (pat) == PARALLEL)
2880 for (i = 0; i < XVECLEN (pat, 0); i++)
2882 rtx x = XVECEXP (pat, 0, i);
2884 if (GET_CODE (x) == SET)
2885 mark_set (x, insn);
2886 else if (GET_CODE (x) == CLOBBER)
2887 mark_clobber (x, insn);
2888 else if (GET_CODE (x) == CALL)
2889 mark_call (insn);
2892 else if (GET_CODE (pat) == CLOBBER)
2893 mark_clobber (pat, insn);
2894 else if (GET_CODE (pat) == CALL)
2895 mark_call (insn);
2899 /* Classic GCSE reaching definition support. */
2901 /* Allocate reaching def variables. */
2903 static void
2904 alloc_rd_mem (n_blocks, n_insns)
2905 int n_blocks, n_insns;
2907 rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2908 sbitmap_vector_zero (rd_kill, n_blocks);
2910 rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2911 sbitmap_vector_zero (rd_gen, n_blocks);
2913 reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2914 sbitmap_vector_zero (reaching_defs, n_blocks);
2916 rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2917 sbitmap_vector_zero (rd_out, n_blocks);
2920 /* Free reaching def variables. */
2922 static void
2923 free_rd_mem ()
2925 sbitmap_vector_free (rd_kill);
2926 sbitmap_vector_free (rd_gen);
2927 sbitmap_vector_free (reaching_defs);
2928 sbitmap_vector_free (rd_out);
2931 /* Add INSN to the kills of BB. REGNO, set in BB, is killed by INSN. */
2933 static void
2934 handle_rd_kill_set (insn, regno, bb)
2935 rtx insn;
2936 int regno;
2937 basic_block bb;
2939 struct reg_set *this_reg;
2941 for (this_reg = reg_set_table[regno]; this_reg; this_reg = this_reg ->next)
2942 if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn))
2943 SET_BIT (rd_kill[bb->index], INSN_CUID (this_reg->insn));
2946 /* Compute the set of kill's for reaching definitions. */
2948 static void
2949 compute_kill_rd ()
2951 int cuid;
2952 unsigned int regno;
2953 int i;
2954 basic_block bb;
2956 /* For each block
2957 For each set bit in `gen' of the block (i.e each insn which
2958 generates a definition in the block)
2959 Call the reg set by the insn corresponding to that bit regx
2960 Look at the linked list starting at reg_set_table[regx]
2961 For each setting of regx in the linked list, which is not in
2962 this block
2963 Set the bit in `kill' corresponding to that insn. */
2964 FOR_EACH_BB (bb)
2965 for (cuid = 0; cuid < max_cuid; cuid++)
2966 if (TEST_BIT (rd_gen[bb->index], cuid))
2968 rtx insn = CUID_INSN (cuid);
2969 rtx pat = PATTERN (insn);
2971 if (GET_CODE (insn) == CALL_INSN)
2973 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2974 if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
2975 handle_rd_kill_set (insn, regno, bb);
2978 if (GET_CODE (pat) == PARALLEL)
2980 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
2982 enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i));
2984 if ((code == SET || code == CLOBBER)
2985 && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG)
2986 handle_rd_kill_set (insn,
2987 REGNO (XEXP (XVECEXP (pat, 0, i), 0)),
2988 bb);
2991 else if (GET_CODE (pat) == SET && GET_CODE (SET_DEST (pat)) == REG)
2992 /* Each setting of this register outside of this block
2993 must be marked in the set of kills in this block. */
2994 handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), bb);
2998 /* Compute the reaching definitions as in
2999 Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
3000 Chapter 10. It is the same algorithm as used for computing available
3001 expressions but applied to the gens and kills of reaching definitions. */
3003 static void
3004 compute_rd ()
3006 int changed, passes;
3007 basic_block bb;
3009 FOR_EACH_BB (bb)
3010 sbitmap_copy (rd_out[bb->index] /*dst*/, rd_gen[bb->index] /*src*/);
3012 passes = 0;
3013 changed = 1;
3014 while (changed)
3016 changed = 0;
3017 FOR_EACH_BB (bb)
3019 sbitmap_union_of_preds (reaching_defs[bb->index], rd_out, bb->index);
3020 changed |= sbitmap_union_of_diff_cg (rd_out[bb->index], rd_gen[bb->index],
3021 reaching_defs[bb->index], rd_kill[bb->index]);
3023 passes++;
3026 if (gcse_file)
3027 fprintf (gcse_file, "reaching def computation: %d passes\n", passes);
3030 /* Classic GCSE available expression support. */
3032 /* Allocate memory for available expression computation. */
3034 static void
3035 alloc_avail_expr_mem (n_blocks, n_exprs)
3036 int n_blocks, n_exprs;
3038 ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
3039 sbitmap_vector_zero (ae_kill, n_blocks);
3041 ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
3042 sbitmap_vector_zero (ae_gen, n_blocks);
3044 ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
3045 sbitmap_vector_zero (ae_in, n_blocks);
3047 ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
3048 sbitmap_vector_zero (ae_out, n_blocks);
3051 static void
3052 free_avail_expr_mem ()
3054 sbitmap_vector_free (ae_kill);
3055 sbitmap_vector_free (ae_gen);
3056 sbitmap_vector_free (ae_in);
3057 sbitmap_vector_free (ae_out);
3060 /* Compute the set of available expressions generated in each basic block. */
3062 static void
3063 compute_ae_gen (expr_hash_table)
3064 struct hash_table *expr_hash_table;
3066 unsigned int i;
3067 struct expr *expr;
3068 struct occr *occr;
3070 /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
3071 This is all we have to do because an expression is not recorded if it
3072 is not available, and the only expressions we want to work with are the
3073 ones that are recorded. */
3074 for (i = 0; i < expr_hash_table->size; i++)
3075 for (expr = expr_hash_table->table[i]; expr != 0; expr = expr->next_same_hash)
3076 for (occr = expr->avail_occr; occr != 0; occr = occr->next)
3077 SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index);
3080 /* Return nonzero if expression X is killed in BB. */
3082 static int
3083 expr_killed_p (x, bb)
3084 rtx x;
3085 basic_block bb;
3087 int i, j;
3088 enum rtx_code code;
3089 const char *fmt;
3091 if (x == 0)
3092 return 1;
3094 code = GET_CODE (x);
3095 switch (code)
3097 case REG:
3098 return TEST_BIT (reg_set_in_block[bb->index], REGNO (x));
3100 case MEM:
3101 if (load_killed_in_block_p (bb, get_max_uid () + 1, x, 0))
3102 return 1;
3103 else
3104 return expr_killed_p (XEXP (x, 0), bb);
3106 case PC:
3107 case CC0: /*FIXME*/
3108 case CONST:
3109 case CONST_INT:
3110 case CONST_DOUBLE:
3111 case CONST_VECTOR:
3112 case SYMBOL_REF:
3113 case LABEL_REF:
3114 case ADDR_VEC:
3115 case ADDR_DIFF_VEC:
3116 return 0;
3118 default:
3119 break;
3122 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3124 if (fmt[i] == 'e')
3126 /* If we are about to do the last recursive call
3127 needed at this level, change it into iteration.
3128 This function is called enough to be worth it. */
3129 if (i == 0)
3130 return expr_killed_p (XEXP (x, i), bb);
3131 else if (expr_killed_p (XEXP (x, i), bb))
3132 return 1;
3134 else if (fmt[i] == 'E')
3135 for (j = 0; j < XVECLEN (x, i); j++)
3136 if (expr_killed_p (XVECEXP (x, i, j), bb))
3137 return 1;
3140 return 0;
3143 /* Compute the set of available expressions killed in each basic block. */
3145 static void
3146 compute_ae_kill (ae_gen, ae_kill, expr_hash_table)
3147 sbitmap *ae_gen, *ae_kill;
3148 struct hash_table *expr_hash_table;
3150 basic_block bb;
3151 unsigned int i;
3152 struct expr *expr;
3154 FOR_EACH_BB (bb)
3155 for (i = 0; i < expr_hash_table->size; i++)
3156 for (expr = expr_hash_table->table[i]; expr; expr = expr->next_same_hash)
3158 /* Skip EXPR if generated in this block. */
3159 if (TEST_BIT (ae_gen[bb->index], expr->bitmap_index))
3160 continue;
3162 if (expr_killed_p (expr->expr, bb))
3163 SET_BIT (ae_kill[bb->index], expr->bitmap_index);
3167 /* Actually perform the Classic GCSE optimizations. */
3169 /* Return nonzero if occurrence OCCR of expression EXPR reaches block BB.
3171 CHECK_SELF_LOOP is nonzero if we should consider a block reaching itself
3172 as a positive reach. We want to do this when there are two computations
3173 of the expression in the block.
3175 VISITED is a pointer to a working buffer for tracking which BB's have
3176 been visited. It is NULL for the top-level call.
3178 We treat reaching expressions that go through blocks containing the same
3179 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
3180 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
3181 2 as not reaching. The intent is to improve the probability of finding
3182 only one reaching expression and to reduce register lifetimes by picking
3183 the closest such expression. */
3185 static int
3186 expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited)
3187 struct occr *occr;
3188 struct expr *expr;
3189 basic_block bb;
3190 int check_self_loop;
3191 char *visited;
3193 edge pred;
3195 for (pred = bb->pred; pred != NULL; pred = pred->pred_next)
3197 basic_block pred_bb = pred->src;
3199 if (visited[pred_bb->index])
3200 /* This predecessor has already been visited. Nothing to do. */
3202 else if (pred_bb == bb)
3204 /* BB loops on itself. */
3205 if (check_self_loop
3206 && TEST_BIT (ae_gen[pred_bb->index], expr->bitmap_index)
3207 && BLOCK_NUM (occr->insn) == pred_bb->index)
3208 return 1;
3210 visited[pred_bb->index] = 1;
3213 /* Ignore this predecessor if it kills the expression. */
3214 else if (TEST_BIT (ae_kill[pred_bb->index], expr->bitmap_index))
3215 visited[pred_bb->index] = 1;
3217 /* Does this predecessor generate this expression? */
3218 else if (TEST_BIT (ae_gen[pred_bb->index], expr->bitmap_index))
3220 /* Is this the occurrence we're looking for?
3221 Note that there's only one generating occurrence per block
3222 so we just need to check the block number. */
3223 if (BLOCK_NUM (occr->insn) == pred_bb->index)
3224 return 1;
3226 visited[pred_bb->index] = 1;
3229 /* Neither gen nor kill. */
3230 else
3232 visited[pred_bb->index] = 1;
3233 if (expr_reaches_here_p_work (occr, expr, pred_bb, check_self_loop,
3234 visited))
3236 return 1;
3240 /* All paths have been checked. */
3241 return 0;
3244 /* This wrapper for expr_reaches_here_p_work() is to ensure that any
3245 memory allocated for that function is returned. */
3247 static int
3248 expr_reaches_here_p (occr, expr, bb, check_self_loop)
3249 struct occr *occr;
3250 struct expr *expr;
3251 basic_block bb;
3252 int check_self_loop;
3254 int rval;
3255 char *visited = (char *) xcalloc (last_basic_block, 1);
3257 rval = expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited);
3259 free (visited);
3260 return rval;
3263 /* Return the instruction that computes EXPR that reaches INSN's basic block.
3264 If there is more than one such instruction, return NULL.
3266 Called only by handle_avail_expr. */
3268 static rtx
3269 computing_insn (expr, insn)
3270 struct expr *expr;
3271 rtx insn;
3273 basic_block bb = BLOCK_FOR_INSN (insn);
3275 if (expr->avail_occr->next == NULL)
3277 if (BLOCK_FOR_INSN (expr->avail_occr->insn) == bb)
3278 /* The available expression is actually itself
3279 (i.e. a loop in the flow graph) so do nothing. */
3280 return NULL;
3282 /* (FIXME) Case that we found a pattern that was created by
3283 a substitution that took place. */
3284 return expr->avail_occr->insn;
3286 else
3288 /* Pattern is computed more than once.
3289 Search backwards from this insn to see how many of these
3290 computations actually reach this insn. */
3291 struct occr *occr;
3292 rtx insn_computes_expr = NULL;
3293 int can_reach = 0;
3295 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
3297 if (BLOCK_FOR_INSN (occr->insn) == bb)
3299 /* The expression is generated in this block.
3300 The only time we care about this is when the expression
3301 is generated later in the block [and thus there's a loop].
3302 We let the normal cse pass handle the other cases. */
3303 if (INSN_CUID (insn) < INSN_CUID (occr->insn)
3304 && expr_reaches_here_p (occr, expr, bb, 1))
3306 can_reach++;
3307 if (can_reach > 1)
3308 return NULL;
3310 insn_computes_expr = occr->insn;
3313 else if (expr_reaches_here_p (occr, expr, bb, 0))
3315 can_reach++;
3316 if (can_reach > 1)
3317 return NULL;
3319 insn_computes_expr = occr->insn;
3323 if (insn_computes_expr == NULL)
3324 abort ();
3326 return insn_computes_expr;
3330 /* Return nonzero if the definition in DEF_INSN can reach INSN.
3331 Only called by can_disregard_other_sets. */
3333 static int
3334 def_reaches_here_p (insn, def_insn)
3335 rtx insn, def_insn;
3337 rtx reg;
3339 if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn)))
3340 return 1;
3342 if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn))
3344 if (INSN_CUID (def_insn) < INSN_CUID (insn))
3346 if (GET_CODE (PATTERN (def_insn)) == PARALLEL)
3347 return 1;
3348 else if (GET_CODE (PATTERN (def_insn)) == CLOBBER)
3349 reg = XEXP (PATTERN (def_insn), 0);
3350 else if (GET_CODE (PATTERN (def_insn)) == SET)
3351 reg = SET_DEST (PATTERN (def_insn));
3352 else
3353 abort ();
3355 return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn);
3357 else
3358 return 0;
3361 return 0;
3364 /* Return nonzero if *ADDR_THIS_REG can only have one value at INSN. The
3365 value returned is the number of definitions that reach INSN. Returning a
3366 value of zero means that [maybe] more than one definition reaches INSN and
3367 the caller can't perform whatever optimization it is trying. i.e. it is
3368 always safe to return zero. */
3370 static int
3371 can_disregard_other_sets (addr_this_reg, insn, for_combine)
3372 struct reg_set **addr_this_reg;
3373 rtx insn;
3374 int for_combine;
3376 int number_of_reaching_defs = 0;
3377 struct reg_set *this_reg;
3379 for (this_reg = *addr_this_reg; this_reg != 0; this_reg = this_reg->next)
3380 if (def_reaches_here_p (insn, this_reg->insn))
3382 number_of_reaching_defs++;
3383 /* Ignore parallels for now. */
3384 if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL)
3385 return 0;
3387 if (!for_combine
3388 && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER
3389 || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3390 SET_SRC (PATTERN (insn)))))
3391 /* A setting of the reg to a different value reaches INSN. */
3392 return 0;
3394 if (number_of_reaching_defs > 1)
3396 /* If in this setting the value the register is being set to is
3397 equal to the previous value the register was set to and this
3398 setting reaches the insn we are trying to do the substitution
3399 on then we are ok. */
3400 if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER)
3401 return 0;
3402 else if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3403 SET_SRC (PATTERN (insn))))
3404 return 0;
3407 *addr_this_reg = this_reg;
3410 return number_of_reaching_defs;
3413 /* Expression computed by insn is available and the substitution is legal,
3414 so try to perform the substitution.
3416 The result is nonzero if any changes were made. */
3418 static int
3419 handle_avail_expr (insn, expr)
3420 rtx insn;
3421 struct expr *expr;
3423 rtx pat, insn_computes_expr, expr_set;
3424 rtx to;
3425 struct reg_set *this_reg;
3426 int found_setting, use_src;
3427 int changed = 0;
3429 /* We only handle the case where one computation of the expression
3430 reaches this instruction. */
3431 insn_computes_expr = computing_insn (expr, insn);
3432 if (insn_computes_expr == NULL)
3433 return 0;
3434 expr_set = single_set (insn_computes_expr);
3435 if (!expr_set)
3436 abort ();
3438 found_setting = 0;
3439 use_src = 0;
3441 /* At this point we know only one computation of EXPR outside of this
3442 block reaches this insn. Now try to find a register that the
3443 expression is computed into. */
3444 if (GET_CODE (SET_SRC (expr_set)) == REG)
3446 /* This is the case when the available expression that reaches
3447 here has already been handled as an available expression. */
3448 unsigned int regnum_for_replacing
3449 = REGNO (SET_SRC (expr_set));
3451 /* If the register was created by GCSE we can't use `reg_set_table',
3452 however we know it's set only once. */
3453 if (regnum_for_replacing >= max_gcse_regno
3454 /* If the register the expression is computed into is set only once,
3455 or only one set reaches this insn, we can use it. */
3456 || (((this_reg = reg_set_table[regnum_for_replacing]),
3457 this_reg->next == NULL)
3458 || can_disregard_other_sets (&this_reg, insn, 0)))
3460 use_src = 1;
3461 found_setting = 1;
3465 if (!found_setting)
3467 unsigned int regnum_for_replacing
3468 = REGNO (SET_DEST (expr_set));
3470 /* This shouldn't happen. */
3471 if (regnum_for_replacing >= max_gcse_regno)
3472 abort ();
3474 this_reg = reg_set_table[regnum_for_replacing];
3476 /* If the register the expression is computed into is set only once,
3477 or only one set reaches this insn, use it. */
3478 if (this_reg->next == NULL
3479 || can_disregard_other_sets (&this_reg, insn, 0))
3480 found_setting = 1;
3483 if (found_setting)
3485 pat = PATTERN (insn);
3486 if (use_src)
3487 to = SET_SRC (expr_set);
3488 else
3489 to = SET_DEST (expr_set);
3490 changed = validate_change (insn, &SET_SRC (pat), to, 0);
3492 /* We should be able to ignore the return code from validate_change but
3493 to play it safe we check. */
3494 if (changed)
3496 gcse_subst_count++;
3497 if (gcse_file != NULL)
3499 fprintf (gcse_file, "GCSE: Replacing the source in insn %d with",
3500 INSN_UID (insn));
3501 fprintf (gcse_file, " reg %d %s insn %d\n",
3502 REGNO (to), use_src ? "from" : "set in",
3503 INSN_UID (insn_computes_expr));
3508 /* The register that the expr is computed into is set more than once. */
3509 else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3511 /* Insert an insn after insnx that copies the reg set in insnx
3512 into a new pseudo register call this new register REGN.
3513 From insnb until end of basic block or until REGB is set
3514 replace all uses of REGB with REGN. */
3515 rtx new_insn;
3517 to = gen_reg_rtx (GET_MODE (SET_DEST (expr_set)));
3519 /* Generate the new insn. */
3520 /* ??? If the change fails, we return 0, even though we created
3521 an insn. I think this is ok. */
3522 new_insn
3523 = emit_insn_after (gen_rtx_SET (VOIDmode, to,
3524 SET_DEST (expr_set)),
3525 insn_computes_expr);
3527 /* Keep register set table up to date. */
3528 record_one_set (REGNO (to), new_insn);
3530 gcse_create_count++;
3531 if (gcse_file != NULL)
3533 fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d",
3534 INSN_UID (NEXT_INSN (insn_computes_expr)),
3535 REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))));
3536 fprintf (gcse_file, ", computed in insn %d,\n",
3537 INSN_UID (insn_computes_expr));
3538 fprintf (gcse_file, " into newly allocated reg %d\n",
3539 REGNO (to));
3542 pat = PATTERN (insn);
3544 /* Do register replacement for INSN. */
3545 changed = validate_change (insn, &SET_SRC (pat),
3546 SET_DEST (PATTERN
3547 (NEXT_INSN (insn_computes_expr))),
3550 /* We should be able to ignore the return code from validate_change but
3551 to play it safe we check. */
3552 if (changed)
3554 gcse_subst_count++;
3555 if (gcse_file != NULL)
3557 fprintf (gcse_file,
3558 "GCSE: Replacing the source in insn %d with reg %d ",
3559 INSN_UID (insn),
3560 REGNO (SET_DEST (PATTERN (NEXT_INSN
3561 (insn_computes_expr)))));
3562 fprintf (gcse_file, "set in insn %d\n",
3563 INSN_UID (insn_computes_expr));
3568 return changed;
3571 /* Perform classic GCSE. This is called by one_classic_gcse_pass after all
3572 the dataflow analysis has been done.
3574 The result is nonzero if a change was made. */
3576 static int
3577 classic_gcse ()
3579 int changed;
3580 rtx insn;
3581 basic_block bb;
3583 /* Note we start at block 1. */
3585 if (ENTRY_BLOCK_PTR->next_bb == EXIT_BLOCK_PTR)
3586 return 0;
3588 changed = 0;
3589 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR->next_bb->next_bb, EXIT_BLOCK_PTR, next_bb)
3591 /* Reset tables used to keep track of what's still valid [since the
3592 start of the block]. */
3593 reset_opr_set_tables ();
3595 for (insn = bb->head;
3596 insn != NULL && insn != NEXT_INSN (bb->end);
3597 insn = NEXT_INSN (insn))
3599 /* Is insn of form (set (pseudo-reg) ...)? */
3600 if (GET_CODE (insn) == INSN
3601 && GET_CODE (PATTERN (insn)) == SET
3602 && GET_CODE (SET_DEST (PATTERN (insn))) == REG
3603 && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER)
3605 rtx pat = PATTERN (insn);
3606 rtx src = SET_SRC (pat);
3607 struct expr *expr;
3609 if (want_to_gcse_p (src)
3610 /* Is the expression recorded? */
3611 && ((expr = lookup_expr (src, &expr_hash_table)) != NULL)
3612 /* Is the expression available [at the start of the
3613 block]? */
3614 && TEST_BIT (ae_in[bb->index], expr->bitmap_index)
3615 /* Are the operands unchanged since the start of the
3616 block? */
3617 && oprs_not_set_p (src, insn))
3618 changed |= handle_avail_expr (insn, expr);
3621 /* Keep track of everything modified by this insn. */
3622 /* ??? Need to be careful w.r.t. mods done to INSN. */
3623 if (INSN_P (insn))
3624 mark_oprs_set (insn);
3628 return changed;
3631 /* Top level routine to perform one classic GCSE pass.
3633 Return nonzero if a change was made. */
3635 static int
3636 one_classic_gcse_pass (pass)
3637 int pass;
3639 int changed = 0;
3641 gcse_subst_count = 0;
3642 gcse_create_count = 0;
3644 alloc_hash_table (max_cuid, &expr_hash_table, 0);
3645 alloc_rd_mem (last_basic_block, max_cuid);
3646 compute_hash_table (&expr_hash_table);
3647 if (gcse_file)
3648 dump_hash_table (gcse_file, "Expression", &expr_hash_table);
3650 if (expr_hash_table.n_elems > 0)
3652 compute_kill_rd ();
3653 compute_rd ();
3654 alloc_avail_expr_mem (last_basic_block, expr_hash_table.n_elems);
3655 compute_ae_gen (&expr_hash_table);
3656 compute_ae_kill (ae_gen, ae_kill, &expr_hash_table);
3657 compute_available (ae_gen, ae_kill, ae_out, ae_in);
3658 changed = classic_gcse ();
3659 free_avail_expr_mem ();
3662 free_rd_mem ();
3663 free_hash_table (&expr_hash_table);
3665 if (gcse_file)
3667 fprintf (gcse_file, "\n");
3668 fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs,",
3669 current_function_name, pass, bytes_used, gcse_subst_count);
3670 fprintf (gcse_file, "%d insns created\n", gcse_create_count);
3673 return changed;
3676 /* Compute copy/constant propagation working variables. */
3678 /* Local properties of assignments. */
3679 static sbitmap *cprop_pavloc;
3680 static sbitmap *cprop_absaltered;
3682 /* Global properties of assignments (computed from the local properties). */
3683 static sbitmap *cprop_avin;
3684 static sbitmap *cprop_avout;
3686 /* Allocate vars used for copy/const propagation. N_BLOCKS is the number of
3687 basic blocks. N_SETS is the number of sets. */
3689 static void
3690 alloc_cprop_mem (n_blocks, n_sets)
3691 int n_blocks, n_sets;
3693 cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
3694 cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
3696 cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
3697 cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
3700 /* Free vars used by copy/const propagation. */
3702 static void
3703 free_cprop_mem ()
3705 sbitmap_vector_free (cprop_pavloc);
3706 sbitmap_vector_free (cprop_absaltered);
3707 sbitmap_vector_free (cprop_avin);
3708 sbitmap_vector_free (cprop_avout);
3711 /* For each block, compute whether X is transparent. X is either an
3712 expression or an assignment [though we don't care which, for this context
3713 an assignment is treated as an expression]. For each block where an
3714 element of X is modified, set (SET_P == 1) or reset (SET_P == 0) the INDX
3715 bit in BMAP. */
3717 static void
3718 compute_transp (x, indx, bmap, set_p)
3719 rtx x;
3720 int indx;
3721 sbitmap *bmap;
3722 int set_p;
3724 int i, j;
3725 basic_block bb;
3726 enum rtx_code code;
3727 reg_set *r;
3728 const char *fmt;
3730 /* repeat is used to turn tail-recursion into iteration since GCC
3731 can't do it when there's no return value. */
3732 repeat:
3734 if (x == 0)
3735 return;
3737 code = GET_CODE (x);
3738 switch (code)
3740 case REG:
3741 if (set_p)
3743 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3745 FOR_EACH_BB (bb)
3746 if (TEST_BIT (reg_set_in_block[bb->index], REGNO (x)))
3747 SET_BIT (bmap[bb->index], indx);
3749 else
3751 for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3752 SET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3755 else
3757 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3759 FOR_EACH_BB (bb)
3760 if (TEST_BIT (reg_set_in_block[bb->index], REGNO (x)))
3761 RESET_BIT (bmap[bb->index], indx);
3763 else
3765 for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3766 RESET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3770 return;
3772 case MEM:
3773 FOR_EACH_BB (bb)
3775 rtx list_entry = canon_modify_mem_list[bb->index];
3777 while (list_entry)
3779 rtx dest, dest_addr;
3781 if (GET_CODE (XEXP (list_entry, 0)) == CALL_INSN)
3783 if (set_p)
3784 SET_BIT (bmap[bb->index], indx);
3785 else
3786 RESET_BIT (bmap[bb->index], indx);
3787 break;
3789 /* LIST_ENTRY must be an INSN of some kind that sets memory.
3790 Examine each hunk of memory that is modified. */
3792 dest = XEXP (list_entry, 0);
3793 list_entry = XEXP (list_entry, 1);
3794 dest_addr = XEXP (list_entry, 0);
3796 if (canon_true_dependence (dest, GET_MODE (dest), dest_addr,
3797 x, rtx_addr_varies_p))
3799 if (set_p)
3800 SET_BIT (bmap[bb->index], indx);
3801 else
3802 RESET_BIT (bmap[bb->index], indx);
3803 break;
3805 list_entry = XEXP (list_entry, 1);
3809 x = XEXP (x, 0);
3810 goto repeat;
3812 case PC:
3813 case CC0: /*FIXME*/
3814 case CONST:
3815 case CONST_INT:
3816 case CONST_DOUBLE:
3817 case CONST_VECTOR:
3818 case SYMBOL_REF:
3819 case LABEL_REF:
3820 case ADDR_VEC:
3821 case ADDR_DIFF_VEC:
3822 return;
3824 default:
3825 break;
3828 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3830 if (fmt[i] == 'e')
3832 /* If we are about to do the last recursive call
3833 needed at this level, change it into iteration.
3834 This function is called enough to be worth it. */
3835 if (i == 0)
3837 x = XEXP (x, i);
3838 goto repeat;
3841 compute_transp (XEXP (x, i), indx, bmap, set_p);
3843 else if (fmt[i] == 'E')
3844 for (j = 0; j < XVECLEN (x, i); j++)
3845 compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
3849 /* Top level routine to do the dataflow analysis needed by copy/const
3850 propagation. */
3852 static void
3853 compute_cprop_data ()
3855 compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, &set_hash_table);
3856 compute_available (cprop_pavloc, cprop_absaltered,
3857 cprop_avout, cprop_avin);
3860 /* Copy/constant propagation. */
3862 /* Maximum number of register uses in an insn that we handle. */
3863 #define MAX_USES 8
3865 /* Table of uses found in an insn.
3866 Allocated statically to avoid alloc/free complexity and overhead. */
3867 static struct reg_use reg_use_table[MAX_USES];
3869 /* Index into `reg_use_table' while building it. */
3870 static int reg_use_count;
3872 /* Set up a list of register numbers used in INSN. The found uses are stored
3873 in `reg_use_table'. `reg_use_count' is initialized to zero before entry,
3874 and contains the number of uses in the table upon exit.
3876 ??? If a register appears multiple times we will record it multiple times.
3877 This doesn't hurt anything but it will slow things down. */
3879 static void
3880 find_used_regs (xptr, data)
3881 rtx *xptr;
3882 void *data ATTRIBUTE_UNUSED;
3884 int i, j;
3885 enum rtx_code code;
3886 const char *fmt;
3887 rtx x = *xptr;
3889 /* repeat is used to turn tail-recursion into iteration since GCC
3890 can't do it when there's no return value. */
3891 repeat:
3892 if (x == 0)
3893 return;
3895 code = GET_CODE (x);
3896 if (REG_P (x))
3898 if (reg_use_count == MAX_USES)
3899 return;
3901 reg_use_table[reg_use_count].reg_rtx = x;
3902 reg_use_count++;
3905 /* Recursively scan the operands of this expression. */
3907 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3909 if (fmt[i] == 'e')
3911 /* If we are about to do the last recursive call
3912 needed at this level, change it into iteration.
3913 This function is called enough to be worth it. */
3914 if (i == 0)
3916 x = XEXP (x, 0);
3917 goto repeat;
3920 find_used_regs (&XEXP (x, i), data);
3922 else if (fmt[i] == 'E')
3923 for (j = 0; j < XVECLEN (x, i); j++)
3924 find_used_regs (&XVECEXP (x, i, j), data);
3928 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3929 Returns nonzero is successful. */
3931 static int
3932 try_replace_reg (from, to, insn)
3933 rtx from, to, insn;
3935 rtx note = find_reg_equal_equiv_note (insn);
3936 rtx src = 0;
3937 int success = 0;
3938 rtx set = single_set (insn);
3940 validate_replace_src_group (from, to, insn);
3941 if (num_changes_pending () && apply_change_group ())
3942 success = 1;
3944 if (!success && set && reg_mentioned_p (from, SET_SRC (set)))
3946 /* If above failed and this is a single set, try to simplify the source of
3947 the set given our substitution. We could perhaps try this for multiple
3948 SETs, but it probably won't buy us anything. */
3949 src = simplify_replace_rtx (SET_SRC (set), from, to);
3951 if (!rtx_equal_p (src, SET_SRC (set))
3952 && validate_change (insn, &SET_SRC (set), src, 0))
3953 success = 1;
3955 /* If we've failed to do replacement, have a single SET, and don't already
3956 have a note, add a REG_EQUAL note to not lose information. */
3957 if (!success && note == 0 && set != 0)
3958 note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
3961 /* If there is already a NOTE, update the expression in it with our
3962 replacement. */
3963 else if (note != 0)
3964 XEXP (note, 0) = simplify_replace_rtx (XEXP (note, 0), from, to);
3966 /* REG_EQUAL may get simplified into register.
3967 We don't allow that. Remove that note. This code ought
3968 not to happen, because previous code ought to synthesize
3969 reg-reg move, but be on the safe side. */
3970 if (note && REG_P (XEXP (note, 0)))
3971 remove_note (insn, note);
3973 return success;
3976 /* Find a set of REGNOs that are available on entry to INSN's block. Returns
3977 NULL no such set is found. */
3979 static struct expr *
3980 find_avail_set (regno, insn)
3981 int regno;
3982 rtx insn;
3984 /* SET1 contains the last set found that can be returned to the caller for
3985 use in a substitution. */
3986 struct expr *set1 = 0;
3988 /* Loops are not possible here. To get a loop we would need two sets
3989 available at the start of the block containing INSN. ie we would
3990 need two sets like this available at the start of the block:
3992 (set (reg X) (reg Y))
3993 (set (reg Y) (reg X))
3995 This can not happen since the set of (reg Y) would have killed the
3996 set of (reg X) making it unavailable at the start of this block. */
3997 while (1)
3999 rtx src;
4000 struct expr *set = lookup_set (regno, NULL_RTX, &set_hash_table);
4002 /* Find a set that is available at the start of the block
4003 which contains INSN. */
4004 while (set)
4006 if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
4007 break;
4008 set = next_set (regno, set);
4011 /* If no available set was found we've reached the end of the
4012 (possibly empty) copy chain. */
4013 if (set == 0)
4014 break;
4016 if (GET_CODE (set->expr) != SET)
4017 abort ();
4019 src = SET_SRC (set->expr);
4021 /* We know the set is available.
4022 Now check that SRC is ANTLOC (i.e. none of the source operands
4023 have changed since the start of the block).
4025 If the source operand changed, we may still use it for the next
4026 iteration of this loop, but we may not use it for substitutions. */
4028 if (CONSTANT_P (src) || oprs_not_set_p (src, insn))
4029 set1 = set;
4031 /* If the source of the set is anything except a register, then
4032 we have reached the end of the copy chain. */
4033 if (GET_CODE (src) != REG)
4034 break;
4036 /* Follow the copy chain, ie start another iteration of the loop
4037 and see if we have an available copy into SRC. */
4038 regno = REGNO (src);
4041 /* SET1 holds the last set that was available and anticipatable at
4042 INSN. */
4043 return set1;
4046 /* Subroutine of cprop_insn that tries to propagate constants into
4047 JUMP_INSNS. JUMP must be a conditional jump. If SETCC is non-NULL
4048 it is the instruction that immediately precedes JUMP, and must be a
4049 single SET of a register. FROM is what we will try to replace,
4050 SRC is the constant we will try to substitute for it. Returns nonzero
4051 if a change was made. */
4053 static int
4054 cprop_jump (bb, setcc, jump, from, src)
4055 basic_block bb;
4056 rtx setcc;
4057 rtx jump;
4058 rtx from;
4059 rtx src;
4061 rtx new, new_set;
4062 rtx set = pc_set (jump);
4064 /* First substitute in the INSN condition as the SET_SRC of the JUMP,
4065 then substitute that given values in this expanded JUMP. */
4066 if (setcc != NULL
4067 && !modified_between_p (from, setcc, jump)
4068 && !modified_between_p (src, setcc, jump))
4070 rtx setcc_set = single_set (setcc);
4071 new_set = simplify_replace_rtx (SET_SRC (set),
4072 SET_DEST (setcc_set),
4073 SET_SRC (setcc_set));
4075 else
4076 new_set = set;
4078 new = simplify_replace_rtx (new_set, from, src);
4080 /* If no simplification can be made, then try the next
4081 register. */
4082 if (rtx_equal_p (new, new_set) || rtx_equal_p (new, SET_SRC (set)))
4083 return 0;
4085 /* If this is now a no-op delete it, otherwise this must be a valid insn. */
4086 if (new == pc_rtx)
4087 delete_insn (jump);
4088 else
4090 /* Ensure the value computed inside the jump insn to be equivalent
4091 to one computed by setcc. */
4092 if (setcc
4093 && modified_in_p (new, setcc))
4094 return 0;
4095 if (! validate_change (jump, &SET_SRC (set), new, 0))
4096 return 0;
4098 /* If this has turned into an unconditional jump,
4099 then put a barrier after it so that the unreachable
4100 code will be deleted. */
4101 if (GET_CODE (SET_SRC (set)) == LABEL_REF)
4102 emit_barrier_after (jump);
4105 #ifdef HAVE_cc0
4106 /* Delete the cc0 setter. */
4107 if (setcc != NULL && CC0_P (SET_DEST (single_set (setcc))))
4108 delete_insn (setcc);
4109 #endif
4111 run_jump_opt_after_gcse = 1;
4113 const_prop_count++;
4114 if (gcse_file != NULL)
4116 fprintf (gcse_file,
4117 "CONST-PROP: Replacing reg %d in jump_insn %d with constant ",
4118 REGNO (from), INSN_UID (jump));
4119 print_rtl (gcse_file, src);
4120 fprintf (gcse_file, "\n");
4122 purge_dead_edges (bb);
4124 return 1;
4127 static bool
4128 constprop_register (insn, from, to, alter_jumps)
4129 rtx insn;
4130 rtx from;
4131 rtx to;
4132 int alter_jumps;
4134 rtx sset;
4136 /* Check for reg or cc0 setting instructions followed by
4137 conditional branch instructions first. */
4138 if (alter_jumps
4139 && (sset = single_set (insn)) != NULL
4140 && NEXT_INSN (insn)
4141 && any_condjump_p (NEXT_INSN (insn)) && onlyjump_p (NEXT_INSN (insn)))
4143 rtx dest = SET_DEST (sset);
4144 if ((REG_P (dest) || CC0_P (dest))
4145 && cprop_jump (BLOCK_FOR_INSN (insn), insn, NEXT_INSN (insn), from, to))
4146 return 1;
4149 /* Handle normal insns next. */
4150 if (GET_CODE (insn) == INSN
4151 && try_replace_reg (from, to, insn))
4152 return 1;
4154 /* Try to propagate a CONST_INT into a conditional jump.
4155 We're pretty specific about what we will handle in this
4156 code, we can extend this as necessary over time.
4158 Right now the insn in question must look like
4159 (set (pc) (if_then_else ...)) */
4160 else if (alter_jumps && any_condjump_p (insn) && onlyjump_p (insn))
4161 return cprop_jump (BLOCK_FOR_INSN (insn), NULL, insn, from, to);
4162 return 0;
4165 /* Perform constant and copy propagation on INSN.
4166 The result is nonzero if a change was made. */
4168 static int
4169 cprop_insn (insn, alter_jumps)
4170 rtx insn;
4171 int alter_jumps;
4173 struct reg_use *reg_used;
4174 int changed = 0;
4175 rtx note;
4177 if (!INSN_P (insn))
4178 return 0;
4180 reg_use_count = 0;
4181 note_uses (&PATTERN (insn), find_used_regs, NULL);
4183 note = find_reg_equal_equiv_note (insn);
4185 /* We may win even when propagating constants into notes. */
4186 if (note)
4187 find_used_regs (&XEXP (note, 0), NULL);
4189 for (reg_used = &reg_use_table[0]; reg_use_count > 0;
4190 reg_used++, reg_use_count--)
4192 unsigned int regno = REGNO (reg_used->reg_rtx);
4193 rtx pat, src;
4194 struct expr *set;
4196 /* Ignore registers created by GCSE.
4197 We do this because ... */
4198 if (regno >= max_gcse_regno)
4199 continue;
4201 /* If the register has already been set in this block, there's
4202 nothing we can do. */
4203 if (! oprs_not_set_p (reg_used->reg_rtx, insn))
4204 continue;
4206 /* Find an assignment that sets reg_used and is available
4207 at the start of the block. */
4208 set = find_avail_set (regno, insn);
4209 if (! set)
4210 continue;
4212 pat = set->expr;
4213 /* ??? We might be able to handle PARALLELs. Later. */
4214 if (GET_CODE (pat) != SET)
4215 abort ();
4217 src = SET_SRC (pat);
4219 /* Constant propagation. */
4220 if (CONSTANT_P (src))
4222 if (constprop_register (insn, reg_used->reg_rtx, src, alter_jumps))
4224 changed = 1;
4225 const_prop_count++;
4226 if (gcse_file != NULL)
4228 fprintf (gcse_file, "GLOBAL CONST-PROP: Replacing reg %d in ", regno);
4229 fprintf (gcse_file, "insn %d with constant ", INSN_UID (insn));
4230 print_rtl (gcse_file, src);
4231 fprintf (gcse_file, "\n");
4235 else if (GET_CODE (src) == REG
4236 && REGNO (src) >= FIRST_PSEUDO_REGISTER
4237 && REGNO (src) != regno)
4239 if (try_replace_reg (reg_used->reg_rtx, src, insn))
4241 changed = 1;
4242 copy_prop_count++;
4243 if (gcse_file != NULL)
4245 fprintf (gcse_file, "GLOBAL COPY-PROP: Replacing reg %d in insn %d",
4246 regno, INSN_UID (insn));
4247 fprintf (gcse_file, " with reg %d\n", REGNO (src));
4250 /* The original insn setting reg_used may or may not now be
4251 deletable. We leave the deletion to flow. */
4252 /* FIXME: If it turns out that the insn isn't deletable,
4253 then we may have unnecessarily extended register lifetimes
4254 and made things worse. */
4259 return changed;
4262 /* Like find_used_regs, but avoid recording uses that appear in
4263 input-output contexts such as zero_extract or pre_dec. This
4264 restricts the cases we consider to those for which local cprop
4265 can legitimately make replacements. */
4267 static void
4268 local_cprop_find_used_regs (xptr, data)
4269 rtx *xptr;
4270 void *data;
4272 rtx x = *xptr;
4274 if (x == 0)
4275 return;
4277 switch (GET_CODE (x))
4279 case ZERO_EXTRACT:
4280 case SIGN_EXTRACT:
4281 case STRICT_LOW_PART:
4282 return;
4284 case PRE_DEC:
4285 case PRE_INC:
4286 case POST_DEC:
4287 case POST_INC:
4288 case PRE_MODIFY:
4289 case POST_MODIFY:
4290 /* Can only legitimately appear this early in the context of
4291 stack pushes for function arguments, but handle all of the
4292 codes nonetheless. */
4293 return;
4295 case SUBREG:
4296 /* Setting a subreg of a register larger than word_mode leaves
4297 the non-written words unchanged. */
4298 if (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (x))) > BITS_PER_WORD)
4299 return;
4300 break;
4302 default:
4303 break;
4306 find_used_regs (xptr, data);
4309 /* LIBCALL_SP is a zero-terminated array of insns at the end of a libcall;
4310 their REG_EQUAL notes need updating. */
4312 static bool
4313 do_local_cprop (x, insn, alter_jumps, libcall_sp)
4314 rtx x;
4315 rtx insn;
4316 int alter_jumps;
4317 rtx *libcall_sp;
4319 rtx newreg = NULL, newcnst = NULL;
4321 /* Rule out USE instructions and ASM statements as we don't want to
4322 change the hard registers mentioned. */
4323 if (GET_CODE (x) == REG
4324 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
4325 || (GET_CODE (PATTERN (insn)) != USE
4326 && asm_noperands (PATTERN (insn)) < 0)))
4328 cselib_val *val = cselib_lookup (x, GET_MODE (x), 0);
4329 struct elt_loc_list *l;
4331 if (!val)
4332 return false;
4333 for (l = val->locs; l; l = l->next)
4335 rtx this_rtx = l->loc;
4336 rtx note;
4338 if (l->in_libcall)
4339 continue;
4341 if (CONSTANT_P (this_rtx)
4342 && GET_CODE (this_rtx) != CONSTANT_P_RTX)
4343 newcnst = this_rtx;
4344 if (REG_P (this_rtx) && REGNO (this_rtx) >= FIRST_PSEUDO_REGISTER
4345 /* Don't copy propagate if it has attached REG_EQUIV note.
4346 At this point this only function parameters should have
4347 REG_EQUIV notes and if the argument slot is used somewhere
4348 explicitly, it means address of parameter has been taken,
4349 so we should not extend the lifetime of the pseudo. */
4350 && (!(note = find_reg_note (l->setting_insn, REG_EQUIV, NULL_RTX))
4351 || GET_CODE (XEXP (note, 0)) != MEM))
4352 newreg = this_rtx;
4354 if (newcnst && constprop_register (insn, x, newcnst, alter_jumps))
4356 /* If we find a case where we can't fix the retval REG_EQUAL notes
4357 match the new register, we either have to abandon this replacement
4358 or fix delete_trivially_dead_insns to preserve the setting insn,
4359 or make it delete the REG_EUAQL note, and fix up all passes that
4360 require the REG_EQUAL note there. */
4361 if (!adjust_libcall_notes (x, newcnst, insn, libcall_sp))
4362 abort ();
4363 if (gcse_file != NULL)
4365 fprintf (gcse_file, "LOCAL CONST-PROP: Replacing reg %d in ",
4366 REGNO (x));
4367 fprintf (gcse_file, "insn %d with constant ",
4368 INSN_UID (insn));
4369 print_rtl (gcse_file, newcnst);
4370 fprintf (gcse_file, "\n");
4372 const_prop_count++;
4373 return true;
4375 else if (newreg && newreg != x && try_replace_reg (x, newreg, insn))
4377 adjust_libcall_notes (x, newreg, insn, libcall_sp);
4378 if (gcse_file != NULL)
4380 fprintf (gcse_file,
4381 "LOCAL COPY-PROP: Replacing reg %d in insn %d",
4382 REGNO (x), INSN_UID (insn));
4383 fprintf (gcse_file, " with reg %d\n", REGNO (newreg));
4385 copy_prop_count++;
4386 return true;
4389 return false;
4392 /* LIBCALL_SP is a zero-terminated array of insns at the end of a libcall;
4393 their REG_EQUAL notes need updating to reflect that OLDREG has been
4394 replaced with NEWVAL in INSN. Return true if all substitutions could
4395 be made. */
4396 static bool
4397 adjust_libcall_notes (oldreg, newval, insn, libcall_sp)
4398 rtx oldreg, newval, insn, *libcall_sp;
4400 rtx end;
4402 while ((end = *libcall_sp++))
4404 rtx note = find_reg_equal_equiv_note (end);
4406 if (! note)
4407 continue;
4409 if (REG_P (newval))
4411 if (reg_set_between_p (newval, PREV_INSN (insn), end))
4415 note = find_reg_equal_equiv_note (end);
4416 if (! note)
4417 continue;
4418 if (reg_mentioned_p (newval, XEXP (note, 0)))
4419 return false;
4421 while ((end = *libcall_sp++));
4422 return true;
4425 XEXP (note, 0) = replace_rtx (XEXP (note, 0), oldreg, newval);
4426 insn = end;
4428 return true;
4431 #define MAX_NESTED_LIBCALLS 9
4433 static void
4434 local_cprop_pass (alter_jumps)
4435 int alter_jumps;
4437 rtx insn;
4438 struct reg_use *reg_used;
4439 rtx libcall_stack[MAX_NESTED_LIBCALLS + 1], *libcall_sp;
4440 bool changed = false;
4442 cselib_init ();
4443 libcall_sp = &libcall_stack[MAX_NESTED_LIBCALLS];
4444 *libcall_sp = 0;
4445 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
4447 if (INSN_P (insn))
4449 rtx note = find_reg_note (insn, REG_LIBCALL, NULL_RTX);
4451 if (note)
4453 if (libcall_sp == libcall_stack)
4454 abort ();
4455 *--libcall_sp = XEXP (note, 0);
4457 note = find_reg_note (insn, REG_RETVAL, NULL_RTX);
4458 if (note)
4459 libcall_sp++;
4460 note = find_reg_equal_equiv_note (insn);
4463 reg_use_count = 0;
4464 note_uses (&PATTERN (insn), local_cprop_find_used_regs, NULL);
4465 if (note)
4466 local_cprop_find_used_regs (&XEXP (note, 0), NULL);
4468 for (reg_used = &reg_use_table[0]; reg_use_count > 0;
4469 reg_used++, reg_use_count--)
4470 if (do_local_cprop (reg_used->reg_rtx, insn, alter_jumps,
4471 libcall_sp))
4473 changed = true;
4474 break;
4477 while (reg_use_count);
4479 cselib_process_insn (insn);
4481 cselib_finish ();
4482 /* Global analysis may get into infinite loops for unreachable blocks. */
4483 if (changed && alter_jumps)
4485 delete_unreachable_blocks ();
4486 free_reg_set_mem ();
4487 alloc_reg_set_mem (max_reg_num ());
4488 compute_sets (get_insns ());
4492 /* Forward propagate copies. This includes copies and constants. Return
4493 nonzero if a change was made. */
4495 static int
4496 cprop (alter_jumps)
4497 int alter_jumps;
4499 int changed;
4500 basic_block bb;
4501 rtx insn;
4503 /* Note we start at block 1. */
4504 if (ENTRY_BLOCK_PTR->next_bb == EXIT_BLOCK_PTR)
4506 if (gcse_file != NULL)
4507 fprintf (gcse_file, "\n");
4508 return 0;
4511 changed = 0;
4512 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR->next_bb->next_bb, EXIT_BLOCK_PTR, next_bb)
4514 /* Reset tables used to keep track of what's still valid [since the
4515 start of the block]. */
4516 reset_opr_set_tables ();
4518 for (insn = bb->head;
4519 insn != NULL && insn != NEXT_INSN (bb->end);
4520 insn = NEXT_INSN (insn))
4521 if (INSN_P (insn))
4523 changed |= cprop_insn (insn, alter_jumps);
4525 /* Keep track of everything modified by this insn. */
4526 /* ??? Need to be careful w.r.t. mods done to INSN. Don't
4527 call mark_oprs_set if we turned the insn into a NOTE. */
4528 if (GET_CODE (insn) != NOTE)
4529 mark_oprs_set (insn);
4533 if (gcse_file != NULL)
4534 fprintf (gcse_file, "\n");
4536 return changed;
4539 /* Similar to get_condition, only the resulting condition must be
4540 valid at JUMP, instead of at EARLIEST.
4542 This differs from noce_get_condition in ifcvt.c in that we prefer not to
4543 settle for the condition variable in the jump instruction being integral.
4544 We prefer to be able to record the value of a user variable, rather than
4545 the value of a temporary used in a condition. This could be solved by
4546 recording the value of *every* register scaned by canonicalize_condition,
4547 but this would require some code reorganization. */
4549 static rtx
4550 fis_get_condition (jump)
4551 rtx jump;
4553 rtx cond, set, tmp, insn, earliest;
4554 bool reverse;
4556 if (! any_condjump_p (jump))
4557 return NULL_RTX;
4559 set = pc_set (jump);
4560 cond = XEXP (SET_SRC (set), 0);
4562 /* If this branches to JUMP_LABEL when the condition is false,
4563 reverse the condition. */
4564 reverse = (GET_CODE (XEXP (SET_SRC (set), 2)) == LABEL_REF
4565 && XEXP (XEXP (SET_SRC (set), 2), 0) == JUMP_LABEL (jump));
4567 /* Use canonicalize_condition to do the dirty work of manipulating
4568 MODE_CC values and COMPARE rtx codes. */
4569 tmp = canonicalize_condition (jump, cond, reverse, &earliest, NULL_RTX);
4570 if (!tmp)
4571 return NULL_RTX;
4573 /* Verify that the given condition is valid at JUMP by virtue of not
4574 having been modified since EARLIEST. */
4575 for (insn = earliest; insn != jump; insn = NEXT_INSN (insn))
4576 if (INSN_P (insn) && modified_in_p (tmp, insn))
4577 break;
4578 if (insn == jump)
4579 return tmp;
4581 /* The condition was modified. See if we can get a partial result
4582 that doesn't follow all the reversals. Perhaps combine can fold
4583 them together later. */
4584 tmp = XEXP (tmp, 0);
4585 if (!REG_P (tmp) || GET_MODE_CLASS (GET_MODE (tmp)) != MODE_INT)
4586 return NULL_RTX;
4587 tmp = canonicalize_condition (jump, cond, reverse, &earliest, tmp);
4588 if (!tmp)
4589 return NULL_RTX;
4591 /* For sanity's sake, re-validate the new result. */
4592 for (insn = earliest; insn != jump; insn = NEXT_INSN (insn))
4593 if (INSN_P (insn) && modified_in_p (tmp, insn))
4594 return NULL_RTX;
4596 return tmp;
4599 /* Find the implicit sets of a function. An "implicit set" is a constraint
4600 on the value of a variable, implied by a conditional jump. For example,
4601 following "if (x == 2)", the then branch may be optimized as though the
4602 conditional performed an "explicit set", in this example, "x = 2". This
4603 function records the set patterns that are implicit at the start of each
4604 basic block. */
4606 static void
4607 find_implicit_sets ()
4609 basic_block bb, dest;
4610 unsigned int count;
4611 rtx cond, new;
4613 count = 0;
4614 FOR_EACH_BB (bb)
4615 /* Check for more than one sucessor. */
4616 if (bb->succ && bb->succ->succ_next)
4618 cond = fis_get_condition (bb->end);
4620 if (cond
4621 && (GET_CODE (cond) == EQ || GET_CODE (cond) == NE)
4622 && GET_CODE (XEXP (cond, 0)) == REG
4623 && REGNO (XEXP (cond, 0)) >= FIRST_PSEUDO_REGISTER
4624 && CONSTANT_P (XEXP (cond, 1)))
4626 dest = GET_CODE (cond) == EQ ? BRANCH_EDGE (bb)->dest
4627 : FALLTHRU_EDGE (bb)->dest;
4629 if (dest && ! dest->pred->pred_next
4630 && dest != EXIT_BLOCK_PTR)
4632 new = gen_rtx_SET (VOIDmode, XEXP (cond, 0),
4633 XEXP (cond, 1));
4634 implicit_sets[dest->index] = new;
4635 if (gcse_file)
4637 fprintf(gcse_file, "Implicit set of reg %d in ",
4638 REGNO (XEXP (cond, 0)));
4639 fprintf(gcse_file, "basic block %d\n", dest->index);
4641 count++;
4646 if (gcse_file)
4647 fprintf (gcse_file, "Found %d implicit sets\n", count);
4650 /* Perform one copy/constant propagation pass.
4651 PASS is the pass count. If CPROP_JUMPS is true, perform constant
4652 propagation into conditional jumps. If BYPASS_JUMPS is true,
4653 perform conditional jump bypassing optimizations. */
4655 static int
4656 one_cprop_pass (pass, cprop_jumps, bypass_jumps)
4657 int pass;
4658 int cprop_jumps;
4659 int bypass_jumps;
4661 int changed = 0;
4663 const_prop_count = 0;
4664 copy_prop_count = 0;
4666 local_cprop_pass (cprop_jumps);
4668 /* Determine implicit sets. */
4669 implicit_sets = (rtx *) xcalloc (last_basic_block, sizeof (rtx));
4670 find_implicit_sets ();
4672 alloc_hash_table (max_cuid, &set_hash_table, 1);
4673 compute_hash_table (&set_hash_table);
4675 /* Free implicit_sets before peak usage. */
4676 free (implicit_sets);
4677 implicit_sets = NULL;
4679 if (gcse_file)
4680 dump_hash_table (gcse_file, "SET", &set_hash_table);
4681 if (set_hash_table.n_elems > 0)
4683 alloc_cprop_mem (last_basic_block, set_hash_table.n_elems);
4684 compute_cprop_data ();
4685 changed = cprop (cprop_jumps);
4686 if (bypass_jumps)
4687 changed |= bypass_conditional_jumps ();
4688 free_cprop_mem ();
4691 free_hash_table (&set_hash_table);
4693 if (gcse_file)
4695 fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, ",
4696 current_function_name, pass, bytes_used);
4697 fprintf (gcse_file, "%d const props, %d copy props\n\n",
4698 const_prop_count, copy_prop_count);
4700 /* Global analysis may get into infinite loops for unreachable blocks. */
4701 if (changed && cprop_jumps)
4702 delete_unreachable_blocks ();
4704 return changed;
4707 /* Bypass conditional jumps. */
4709 /* The value of last_basic_block at the beginning of the jump_bypass
4710 pass. The use of redirect_edge_and_branch_force may introduce new
4711 basic blocks, but the data flow analysis is only valid for basic
4712 block indices less than bypass_last_basic_block. */
4714 static int bypass_last_basic_block;
4716 /* Find a set of REGNO to a constant that is available at the end of basic
4717 block BB. Returns NULL if no such set is found. Based heavily upon
4718 find_avail_set. */
4720 static struct expr *
4721 find_bypass_set (regno, bb)
4722 int regno;
4723 int bb;
4725 struct expr *result = 0;
4727 for (;;)
4729 rtx src;
4730 struct expr *set = lookup_set (regno, NULL_RTX, &set_hash_table);
4732 while (set)
4734 if (TEST_BIT (cprop_avout[bb], set->bitmap_index))
4735 break;
4736 set = next_set (regno, set);
4739 if (set == 0)
4740 break;
4742 if (GET_CODE (set->expr) != SET)
4743 abort ();
4745 src = SET_SRC (set->expr);
4746 if (CONSTANT_P (src))
4747 result = set;
4749 if (GET_CODE (src) != REG)
4750 break;
4752 regno = REGNO (src);
4754 return result;
4758 /* Subroutine of bypass_conditional_jumps that attempts to bypass the given
4759 basic block BB which has more than one predecessor. If not NULL, SETCC
4760 is the first instruction of BB, which is immediately followed by JUMP_INSN
4761 JUMP. Otherwise, SETCC is NULL, and JUMP is the first insn of BB.
4762 Returns nonzero if a change was made. */
4764 static int
4765 bypass_block (bb, setcc, jump)
4766 basic_block bb;
4767 rtx setcc, jump;
4769 rtx insn, note;
4770 edge e, enext;
4771 int i, change;
4773 insn = (setcc != NULL) ? setcc : jump;
4775 /* Determine set of register uses in INSN. */
4776 reg_use_count = 0;
4777 note_uses (&PATTERN (insn), find_used_regs, NULL);
4778 note = find_reg_equal_equiv_note (insn);
4779 if (note)
4780 find_used_regs (&XEXP (note, 0), NULL);
4782 change = 0;
4783 for (e = bb->pred; e; e = enext)
4785 enext = e->pred_next;
4786 if (e->flags & EDGE_COMPLEX)
4787 continue;
4789 /* We can't redirect edges from new basic blocks. */
4790 if (e->src->index >= bypass_last_basic_block)
4791 continue;
4793 for (i = 0; i < reg_use_count; i++)
4795 struct reg_use *reg_used = &reg_use_table[i];
4796 unsigned int regno = REGNO (reg_used->reg_rtx);
4797 basic_block dest, old_dest;
4798 struct expr *set;
4799 rtx src, new;
4801 if (regno >= max_gcse_regno)
4802 continue;
4804 set = find_bypass_set (regno, e->src->index);
4806 if (! set)
4807 continue;
4809 src = SET_SRC (pc_set (jump));
4811 if (setcc != NULL)
4812 src = simplify_replace_rtx (src,
4813 SET_DEST (PATTERN (setcc)),
4814 SET_SRC (PATTERN (setcc)));
4816 new = simplify_replace_rtx (src, reg_used->reg_rtx,
4817 SET_SRC (set->expr));
4819 if (new == pc_rtx)
4820 dest = FALLTHRU_EDGE (bb)->dest;
4821 else if (GET_CODE (new) == LABEL_REF)
4822 dest = BRANCH_EDGE (bb)->dest;
4823 else
4824 dest = NULL;
4826 old_dest = e->dest;
4827 if (dest != NULL
4828 && dest != old_dest
4829 && dest != EXIT_BLOCK_PTR)
4831 redirect_edge_and_branch_force (e, dest);
4833 /* Copy the register setter to the redirected edge.
4834 Don't copy CC0 setters, as CC0 is dead after jump. */
4835 if (setcc)
4837 rtx pat = PATTERN (setcc);
4838 if (!CC0_P (SET_DEST (pat)))
4839 insert_insn_on_edge (copy_insn (pat), e);
4842 if (gcse_file != NULL)
4844 fprintf (gcse_file, "JUMP-BYPASS: Proved reg %d in jump_insn %d equals constant ",
4845 regno, INSN_UID (jump));
4846 print_rtl (gcse_file, SET_SRC (set->expr));
4847 fprintf (gcse_file, "\nBypass edge from %d->%d to %d\n",
4848 e->src->index, old_dest->index, dest->index);
4850 change = 1;
4851 break;
4855 return change;
4858 /* Find basic blocks with more than one predecessor that only contain a
4859 single conditional jump. If the result of the comparison is known at
4860 compile-time from any incoming edge, redirect that edge to the
4861 appropriate target. Returns nonzero if a change was made. */
4863 static int
4864 bypass_conditional_jumps ()
4866 basic_block bb;
4867 int changed;
4868 rtx setcc;
4869 rtx insn;
4870 rtx dest;
4872 /* Note we start at block 1. */
4873 if (ENTRY_BLOCK_PTR->next_bb == EXIT_BLOCK_PTR)
4874 return 0;
4876 bypass_last_basic_block = last_basic_block;
4878 changed = 0;
4879 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR->next_bb->next_bb,
4880 EXIT_BLOCK_PTR, next_bb)
4882 /* Check for more than one predecessor. */
4883 if (bb->pred && bb->pred->pred_next)
4885 setcc = NULL_RTX;
4886 for (insn = bb->head;
4887 insn != NULL && insn != NEXT_INSN (bb->end);
4888 insn = NEXT_INSN (insn))
4889 if (GET_CODE (insn) == INSN)
4891 if (setcc)
4892 break;
4893 if (GET_CODE (PATTERN (insn)) != SET)
4894 break;
4896 dest = SET_DEST (PATTERN (insn));
4897 if (REG_P (dest) || CC0_P (dest))
4898 setcc = insn;
4899 else
4900 break;
4902 else if (GET_CODE (insn) == JUMP_INSN)
4904 if (any_condjump_p (insn) && onlyjump_p (insn))
4905 changed |= bypass_block (bb, setcc, insn);
4906 break;
4908 else if (INSN_P (insn))
4909 break;
4913 /* If we bypassed any register setting insns, we inserted a
4914 copy on the redirected edge. These need to be committed. */
4915 if (changed)
4916 commit_edge_insertions();
4918 return changed;
4921 /* Compute PRE+LCM working variables. */
4923 /* Local properties of expressions. */
4924 /* Nonzero for expressions that are transparent in the block. */
4925 static sbitmap *transp;
4927 /* Nonzero for expressions that are transparent at the end of the block.
4928 This is only zero for expressions killed by abnormal critical edge
4929 created by a calls. */
4930 static sbitmap *transpout;
4932 /* Nonzero for expressions that are computed (available) in the block. */
4933 static sbitmap *comp;
4935 /* Nonzero for expressions that are locally anticipatable in the block. */
4936 static sbitmap *antloc;
4938 /* Nonzero for expressions where this block is an optimal computation
4939 point. */
4940 static sbitmap *pre_optimal;
4942 /* Nonzero for expressions which are redundant in a particular block. */
4943 static sbitmap *pre_redundant;
4945 /* Nonzero for expressions which should be inserted on a specific edge. */
4946 static sbitmap *pre_insert_map;
4948 /* Nonzero for expressions which should be deleted in a specific block. */
4949 static sbitmap *pre_delete_map;
4951 /* Contains the edge_list returned by pre_edge_lcm. */
4952 static struct edge_list *edge_list;
4954 /* Redundant insns. */
4955 static sbitmap pre_redundant_insns;
4957 /* Allocate vars used for PRE analysis. */
4959 static void
4960 alloc_pre_mem (n_blocks, n_exprs)
4961 int n_blocks, n_exprs;
4963 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4964 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4965 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
4967 pre_optimal = NULL;
4968 pre_redundant = NULL;
4969 pre_insert_map = NULL;
4970 pre_delete_map = NULL;
4971 ae_in = NULL;
4972 ae_out = NULL;
4973 ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
4975 /* pre_insert and pre_delete are allocated later. */
4978 /* Free vars used for PRE analysis. */
4980 static void
4981 free_pre_mem ()
4983 sbitmap_vector_free (transp);
4984 sbitmap_vector_free (comp);
4986 /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */
4988 if (pre_optimal)
4989 sbitmap_vector_free (pre_optimal);
4990 if (pre_redundant)
4991 sbitmap_vector_free (pre_redundant);
4992 if (pre_insert_map)
4993 sbitmap_vector_free (pre_insert_map);
4994 if (pre_delete_map)
4995 sbitmap_vector_free (pre_delete_map);
4996 if (ae_in)
4997 sbitmap_vector_free (ae_in);
4998 if (ae_out)
4999 sbitmap_vector_free (ae_out);
5001 transp = comp = NULL;
5002 pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
5003 ae_in = ae_out = NULL;
5006 /* Top level routine to do the dataflow analysis needed by PRE. */
5008 static void
5009 compute_pre_data ()
5011 sbitmap trapping_expr;
5012 basic_block bb;
5013 unsigned int ui;
5015 compute_local_properties (transp, comp, antloc, &expr_hash_table);
5016 sbitmap_vector_zero (ae_kill, last_basic_block);
5018 /* Collect expressions which might trap. */
5019 trapping_expr = sbitmap_alloc (expr_hash_table.n_elems);
5020 sbitmap_zero (trapping_expr);
5021 for (ui = 0; ui < expr_hash_table.size; ui++)
5023 struct expr *e;
5024 for (e = expr_hash_table.table[ui]; e != NULL; e = e->next_same_hash)
5025 if (may_trap_p (e->expr))
5026 SET_BIT (trapping_expr, e->bitmap_index);
5029 /* Compute ae_kill for each basic block using:
5031 ~(TRANSP | COMP)
5033 This is significantly faster than compute_ae_kill. */
5035 FOR_EACH_BB (bb)
5037 edge e;
5039 /* If the current block is the destination of an abnormal edge, we
5040 kill all trapping expressions because we won't be able to properly
5041 place the instruction on the edge. So make them neither
5042 anticipatable nor transparent. This is fairly conservative. */
5043 for (e = bb->pred; e ; e = e->pred_next)
5044 if (e->flags & EDGE_ABNORMAL)
5046 sbitmap_difference (antloc[bb->index], antloc[bb->index], trapping_expr);
5047 sbitmap_difference (transp[bb->index], transp[bb->index], trapping_expr);
5048 break;
5051 sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
5052 sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
5055 edge_list = pre_edge_lcm (gcse_file, expr_hash_table.n_elems, transp, comp, antloc,
5056 ae_kill, &pre_insert_map, &pre_delete_map);
5057 sbitmap_vector_free (antloc);
5058 antloc = NULL;
5059 sbitmap_vector_free (ae_kill);
5060 ae_kill = NULL;
5061 sbitmap_free (trapping_expr);
5064 /* PRE utilities */
5066 /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach
5067 block BB.
5069 VISITED is a pointer to a working buffer for tracking which BB's have
5070 been visited. It is NULL for the top-level call.
5072 We treat reaching expressions that go through blocks containing the same
5073 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
5074 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
5075 2 as not reaching. The intent is to improve the probability of finding
5076 only one reaching expression and to reduce register lifetimes by picking
5077 the closest such expression. */
5079 static int
5080 pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited)
5081 basic_block occr_bb;
5082 struct expr *expr;
5083 basic_block bb;
5084 char *visited;
5086 edge pred;
5088 for (pred = bb->pred; pred != NULL; pred = pred->pred_next)
5090 basic_block pred_bb = pred->src;
5092 if (pred->src == ENTRY_BLOCK_PTR
5093 /* Has predecessor has already been visited? */
5094 || visited[pred_bb->index])
5095 ;/* Nothing to do. */
5097 /* Does this predecessor generate this expression? */
5098 else if (TEST_BIT (comp[pred_bb->index], expr->bitmap_index))
5100 /* Is this the occurrence we're looking for?
5101 Note that there's only one generating occurrence per block
5102 so we just need to check the block number. */
5103 if (occr_bb == pred_bb)
5104 return 1;
5106 visited[pred_bb->index] = 1;
5108 /* Ignore this predecessor if it kills the expression. */
5109 else if (! TEST_BIT (transp[pred_bb->index], expr->bitmap_index))
5110 visited[pred_bb->index] = 1;
5112 /* Neither gen nor kill. */
5113 else
5115 visited[pred_bb->index] = 1;
5116 if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
5117 return 1;
5121 /* All paths have been checked. */
5122 return 0;
5125 /* The wrapper for pre_expr_reaches_here_work that ensures that any
5126 memory allocated for that function is returned. */
5128 static int
5129 pre_expr_reaches_here_p (occr_bb, expr, bb)
5130 basic_block occr_bb;
5131 struct expr *expr;
5132 basic_block bb;
5134 int rval;
5135 char *visited = (char *) xcalloc (last_basic_block, 1);
5137 rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
5139 free (visited);
5140 return rval;
5144 /* Given an expr, generate RTL which we can insert at the end of a BB,
5145 or on an edge. Set the block number of any insns generated to
5146 the value of BB. */
5148 static rtx
5149 process_insert_insn (expr)
5150 struct expr *expr;
5152 rtx reg = expr->reaching_reg;
5153 rtx exp = copy_rtx (expr->expr);
5154 rtx pat;
5156 start_sequence ();
5158 /* If the expression is something that's an operand, like a constant,
5159 just copy it to a register. */
5160 if (general_operand (exp, GET_MODE (reg)))
5161 emit_move_insn (reg, exp);
5163 /* Otherwise, make a new insn to compute this expression and make sure the
5164 insn will be recognized (this also adds any needed CLOBBERs). Copy the
5165 expression to make sure we don't have any sharing issues. */
5166 else if (insn_invalid_p (emit_insn (gen_rtx_SET (VOIDmode, reg, exp))))
5167 abort ();
5169 pat = get_insns ();
5170 end_sequence ();
5172 return pat;
5175 /* Add EXPR to the end of basic block BB.
5177 This is used by both the PRE and code hoisting.
5179 For PRE, we want to verify that the expr is either transparent
5180 or locally anticipatable in the target block. This check makes
5181 no sense for code hoisting. */
5183 static void
5184 insert_insn_end_bb (expr, bb, pre)
5185 struct expr *expr;
5186 basic_block bb;
5187 int pre;
5189 rtx insn = bb->end;
5190 rtx new_insn;
5191 rtx reg = expr->reaching_reg;
5192 int regno = REGNO (reg);
5193 rtx pat, pat_end;
5195 pat = process_insert_insn (expr);
5196 if (pat == NULL_RTX || ! INSN_P (pat))
5197 abort ();
5199 pat_end = pat;
5200 while (NEXT_INSN (pat_end) != NULL_RTX)
5201 pat_end = NEXT_INSN (pat_end);
5203 /* If the last insn is a jump, insert EXPR in front [taking care to
5204 handle cc0, etc. properly]. Similary we need to care trapping
5205 instructions in presence of non-call exceptions. */
5207 if (GET_CODE (insn) == JUMP_INSN
5208 || (GET_CODE (insn) == INSN
5209 && (bb->succ->succ_next || (bb->succ->flags & EDGE_ABNORMAL))))
5211 #ifdef HAVE_cc0
5212 rtx note;
5213 #endif
5214 /* It should always be the case that we can put these instructions
5215 anywhere in the basic block with performing PRE optimizations.
5216 Check this. */
5217 if (GET_CODE (insn) == INSN && pre
5218 && !TEST_BIT (antloc[bb->index], expr->bitmap_index)
5219 && !TEST_BIT (transp[bb->index], expr->bitmap_index))
5220 abort ();
5222 /* If this is a jump table, then we can't insert stuff here. Since
5223 we know the previous real insn must be the tablejump, we insert
5224 the new instruction just before the tablejump. */
5225 if (GET_CODE (PATTERN (insn)) == ADDR_VEC
5226 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
5227 insn = prev_real_insn (insn);
5229 #ifdef HAVE_cc0
5230 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
5231 if cc0 isn't set. */
5232 note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
5233 if (note)
5234 insn = XEXP (note, 0);
5235 else
5237 rtx maybe_cc0_setter = prev_nonnote_insn (insn);
5238 if (maybe_cc0_setter
5239 && INSN_P (maybe_cc0_setter)
5240 && sets_cc0_p (PATTERN (maybe_cc0_setter)))
5241 insn = maybe_cc0_setter;
5243 #endif
5244 /* FIXME: What if something in cc0/jump uses value set in new insn? */
5245 new_insn = emit_insn_before (pat, insn);
5248 /* Likewise if the last insn is a call, as will happen in the presence
5249 of exception handling. */
5250 else if (GET_CODE (insn) == CALL_INSN
5251 && (bb->succ->succ_next || (bb->succ->flags & EDGE_ABNORMAL)))
5253 /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
5254 we search backward and place the instructions before the first
5255 parameter is loaded. Do this for everyone for consistency and a
5256 presumption that we'll get better code elsewhere as well.
5258 It should always be the case that we can put these instructions
5259 anywhere in the basic block with performing PRE optimizations.
5260 Check this. */
5262 if (pre
5263 && !TEST_BIT (antloc[bb->index], expr->bitmap_index)
5264 && !TEST_BIT (transp[bb->index], expr->bitmap_index))
5265 abort ();
5267 /* Since different machines initialize their parameter registers
5268 in different orders, assume nothing. Collect the set of all
5269 parameter registers. */
5270 insn = find_first_parameter_load (insn, bb->head);
5272 /* If we found all the parameter loads, then we want to insert
5273 before the first parameter load.
5275 If we did not find all the parameter loads, then we might have
5276 stopped on the head of the block, which could be a CODE_LABEL.
5277 If we inserted before the CODE_LABEL, then we would be putting
5278 the insn in the wrong basic block. In that case, put the insn
5279 after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */
5280 while (GET_CODE (insn) == CODE_LABEL
5281 || NOTE_INSN_BASIC_BLOCK_P (insn))
5282 insn = NEXT_INSN (insn);
5284 new_insn = emit_insn_before (pat, insn);
5286 else
5287 new_insn = emit_insn_after (pat, insn);
5289 while (1)
5291 if (INSN_P (pat))
5293 add_label_notes (PATTERN (pat), new_insn);
5294 note_stores (PATTERN (pat), record_set_info, pat);
5296 if (pat == pat_end)
5297 break;
5298 pat = NEXT_INSN (pat);
5301 gcse_create_count++;
5303 if (gcse_file)
5305 fprintf (gcse_file, "PRE/HOIST: end of bb %d, insn %d, ",
5306 bb->index, INSN_UID (new_insn));
5307 fprintf (gcse_file, "copying expression %d to reg %d\n",
5308 expr->bitmap_index, regno);
5312 /* Insert partially redundant expressions on edges in the CFG to make
5313 the expressions fully redundant. */
5315 static int
5316 pre_edge_insert (edge_list, index_map)
5317 struct edge_list *edge_list;
5318 struct expr **index_map;
5320 int e, i, j, num_edges, set_size, did_insert = 0;
5321 sbitmap *inserted;
5323 /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
5324 if it reaches any of the deleted expressions. */
5326 set_size = pre_insert_map[0]->size;
5327 num_edges = NUM_EDGES (edge_list);
5328 inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems);
5329 sbitmap_vector_zero (inserted, num_edges);
5331 for (e = 0; e < num_edges; e++)
5333 int indx;
5334 basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
5336 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
5338 SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
5340 for (j = indx; insert && j < (int) expr_hash_table.n_elems; j++, insert >>= 1)
5341 if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
5343 struct expr *expr = index_map[j];
5344 struct occr *occr;
5346 /* Now look at each deleted occurrence of this expression. */
5347 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
5349 if (! occr->deleted_p)
5350 continue;
5352 /* Insert this expression on this edge if if it would
5353 reach the deleted occurrence in BB. */
5354 if (!TEST_BIT (inserted[e], j))
5356 rtx insn;
5357 edge eg = INDEX_EDGE (edge_list, e);
5359 /* We can't insert anything on an abnormal and
5360 critical edge, so we insert the insn at the end of
5361 the previous block. There are several alternatives
5362 detailed in Morgans book P277 (sec 10.5) for
5363 handling this situation. This one is easiest for
5364 now. */
5366 if ((eg->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL)
5367 insert_insn_end_bb (index_map[j], bb, 0);
5368 else
5370 insn = process_insert_insn (index_map[j]);
5371 insert_insn_on_edge (insn, eg);
5374 if (gcse_file)
5376 fprintf (gcse_file, "PRE/HOIST: edge (%d,%d), ",
5377 bb->index,
5378 INDEX_EDGE_SUCC_BB (edge_list, e)->index);
5379 fprintf (gcse_file, "copy expression %d\n",
5380 expr->bitmap_index);
5383 update_ld_motion_stores (expr);
5384 SET_BIT (inserted[e], j);
5385 did_insert = 1;
5386 gcse_create_count++;
5393 sbitmap_vector_free (inserted);
5394 return did_insert;
5397 /* Copy the result of INSN to REG. INDX is the expression number. */
5399 static void
5400 pre_insert_copy_insn (expr, insn)
5401 struct expr *expr;
5402 rtx insn;
5404 rtx reg = expr->reaching_reg;
5405 int regno = REGNO (reg);
5406 int indx = expr->bitmap_index;
5407 rtx set = single_set (insn);
5408 rtx new_insn;
5410 if (!set)
5411 abort ();
5413 new_insn = emit_insn_after (gen_move_insn (reg, SET_DEST (set)), insn);
5415 /* Keep register set table up to date. */
5416 record_one_set (regno, new_insn);
5418 gcse_create_count++;
5420 if (gcse_file)
5421 fprintf (gcse_file,
5422 "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
5423 BLOCK_NUM (insn), INSN_UID (new_insn), indx,
5424 INSN_UID (insn), regno);
5425 update_ld_motion_stores (expr);
5428 /* Copy available expressions that reach the redundant expression
5429 to `reaching_reg'. */
5431 static void
5432 pre_insert_copies ()
5434 unsigned int i;
5435 struct expr *expr;
5436 struct occr *occr;
5437 struct occr *avail;
5439 /* For each available expression in the table, copy the result to
5440 `reaching_reg' if the expression reaches a deleted one.
5442 ??? The current algorithm is rather brute force.
5443 Need to do some profiling. */
5445 for (i = 0; i < expr_hash_table.size; i++)
5446 for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash)
5448 /* If the basic block isn't reachable, PPOUT will be TRUE. However,
5449 we don't want to insert a copy here because the expression may not
5450 really be redundant. So only insert an insn if the expression was
5451 deleted. This test also avoids further processing if the
5452 expression wasn't deleted anywhere. */
5453 if (expr->reaching_reg == NULL)
5454 continue;
5456 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
5458 if (! occr->deleted_p)
5459 continue;
5461 for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
5463 rtx insn = avail->insn;
5465 /* No need to handle this one if handled already. */
5466 if (avail->copied_p)
5467 continue;
5469 /* Don't handle this one if it's a redundant one. */
5470 if (TEST_BIT (pre_redundant_insns, INSN_CUID (insn)))
5471 continue;
5473 /* Or if the expression doesn't reach the deleted one. */
5474 if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
5475 expr,
5476 BLOCK_FOR_INSN (occr->insn)))
5477 continue;
5479 /* Copy the result of avail to reaching_reg. */
5480 pre_insert_copy_insn (expr, insn);
5481 avail->copied_p = 1;
5487 /* Emit move from SRC to DEST noting the equivalence with expression computed
5488 in INSN. */
5489 static rtx
5490 gcse_emit_move_after (src, dest, insn)
5491 rtx src, dest, insn;
5493 rtx new;
5494 rtx set = single_set (insn), set2;
5495 rtx note;
5496 rtx eqv;
5498 /* This should never fail since we're creating a reg->reg copy
5499 we've verified to be valid. */
5501 new = emit_insn_after (gen_move_insn (dest, src), insn);
5503 /* Note the equivalence for local CSE pass. */
5504 set2 = single_set (new);
5505 if (!set2 || !rtx_equal_p (SET_DEST (set2), dest))
5506 return new;
5507 if ((note = find_reg_equal_equiv_note (insn)))
5508 eqv = XEXP (note, 0);
5509 else
5510 eqv = SET_SRC (set);
5512 set_unique_reg_note (new, REG_EQUAL, copy_insn_1 (eqv));
5514 return new;
5517 /* Delete redundant computations.
5518 Deletion is done by changing the insn to copy the `reaching_reg' of
5519 the expression into the result of the SET. It is left to later passes
5520 (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
5522 Returns nonzero if a change is made. */
5524 static int
5525 pre_delete ()
5527 unsigned int i;
5528 int changed;
5529 struct expr *expr;
5530 struct occr *occr;
5532 changed = 0;
5533 for (i = 0; i < expr_hash_table.size; i++)
5534 for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash)
5536 int indx = expr->bitmap_index;
5538 /* We only need to search antic_occr since we require
5539 ANTLOC != 0. */
5541 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
5543 rtx insn = occr->insn;
5544 rtx set;
5545 basic_block bb = BLOCK_FOR_INSN (insn);
5547 if (TEST_BIT (pre_delete_map[bb->index], indx))
5549 set = single_set (insn);
5550 if (! set)
5551 abort ();
5553 /* Create a pseudo-reg to store the result of reaching
5554 expressions into. Get the mode for the new pseudo from
5555 the mode of the original destination pseudo. */
5556 if (expr->reaching_reg == NULL)
5557 expr->reaching_reg
5558 = gen_reg_rtx (GET_MODE (SET_DEST (set)));
5560 gcse_emit_move_after (expr->reaching_reg, SET_DEST (set), insn);
5561 delete_insn (insn);
5562 occr->deleted_p = 1;
5563 SET_BIT (pre_redundant_insns, INSN_CUID (insn));
5564 changed = 1;
5565 gcse_subst_count++;
5567 if (gcse_file)
5569 fprintf (gcse_file,
5570 "PRE: redundant insn %d (expression %d) in ",
5571 INSN_UID (insn), indx);
5572 fprintf (gcse_file, "bb %d, reaching reg is %d\n",
5573 bb->index, REGNO (expr->reaching_reg));
5579 return changed;
5582 /* Perform GCSE optimizations using PRE.
5583 This is called by one_pre_gcse_pass after all the dataflow analysis
5584 has been done.
5586 This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
5587 lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
5588 Compiler Design and Implementation.
5590 ??? A new pseudo reg is created to hold the reaching expression. The nice
5591 thing about the classical approach is that it would try to use an existing
5592 reg. If the register can't be adequately optimized [i.e. we introduce
5593 reload problems], one could add a pass here to propagate the new register
5594 through the block.
5596 ??? We don't handle single sets in PARALLELs because we're [currently] not
5597 able to copy the rest of the parallel when we insert copies to create full
5598 redundancies from partial redundancies. However, there's no reason why we
5599 can't handle PARALLELs in the cases where there are no partial
5600 redundancies. */
5602 static int
5603 pre_gcse ()
5605 unsigned int i;
5606 int did_insert, changed;
5607 struct expr **index_map;
5608 struct expr *expr;
5610 /* Compute a mapping from expression number (`bitmap_index') to
5611 hash table entry. */
5613 index_map = (struct expr **) xcalloc (expr_hash_table.n_elems, sizeof (struct expr *));
5614 for (i = 0; i < expr_hash_table.size; i++)
5615 for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash)
5616 index_map[expr->bitmap_index] = expr;
5618 /* Reset bitmap used to track which insns are redundant. */
5619 pre_redundant_insns = sbitmap_alloc (max_cuid);
5620 sbitmap_zero (pre_redundant_insns);
5622 /* Delete the redundant insns first so that
5623 - we know what register to use for the new insns and for the other
5624 ones with reaching expressions
5625 - we know which insns are redundant when we go to create copies */
5627 changed = pre_delete ();
5629 did_insert = pre_edge_insert (edge_list, index_map);
5631 /* In other places with reaching expressions, copy the expression to the
5632 specially allocated pseudo-reg that reaches the redundant expr. */
5633 pre_insert_copies ();
5634 if (did_insert)
5636 commit_edge_insertions ();
5637 changed = 1;
5640 free (index_map);
5641 sbitmap_free (pre_redundant_insns);
5642 return changed;
5645 /* Top level routine to perform one PRE GCSE pass.
5647 Return nonzero if a change was made. */
5649 static int
5650 one_pre_gcse_pass (pass)
5651 int pass;
5653 int changed = 0;
5655 gcse_subst_count = 0;
5656 gcse_create_count = 0;
5658 alloc_hash_table (max_cuid, &expr_hash_table, 0);
5659 add_noreturn_fake_exit_edges ();
5660 if (flag_gcse_lm)
5661 compute_ld_motion_mems ();
5663 compute_hash_table (&expr_hash_table);
5664 trim_ld_motion_mems ();
5665 if (gcse_file)
5666 dump_hash_table (gcse_file, "Expression", &expr_hash_table);
5668 if (expr_hash_table.n_elems > 0)
5670 alloc_pre_mem (last_basic_block, expr_hash_table.n_elems);
5671 compute_pre_data ();
5672 changed |= pre_gcse ();
5673 free_edge_list (edge_list);
5674 free_pre_mem ();
5677 free_ldst_mems ();
5678 remove_fake_edges ();
5679 free_hash_table (&expr_hash_table);
5681 if (gcse_file)
5683 fprintf (gcse_file, "\nPRE GCSE of %s, pass %d: %d bytes needed, ",
5684 current_function_name, pass, bytes_used);
5685 fprintf (gcse_file, "%d substs, %d insns created\n",
5686 gcse_subst_count, gcse_create_count);
5689 return changed;
5692 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
5693 If notes are added to an insn which references a CODE_LABEL, the
5694 LABEL_NUSES count is incremented. We have to add REG_LABEL notes,
5695 because the following loop optimization pass requires them. */
5697 /* ??? This is very similar to the loop.c add_label_notes function. We
5698 could probably share code here. */
5700 /* ??? If there was a jump optimization pass after gcse and before loop,
5701 then we would not need to do this here, because jump would add the
5702 necessary REG_LABEL notes. */
5704 static void
5705 add_label_notes (x, insn)
5706 rtx x;
5707 rtx insn;
5709 enum rtx_code code = GET_CODE (x);
5710 int i, j;
5711 const char *fmt;
5713 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
5715 /* This code used to ignore labels that referred to dispatch tables to
5716 avoid flow generating (slighly) worse code.
5718 We no longer ignore such label references (see LABEL_REF handling in
5719 mark_jump_label for additional information). */
5721 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, XEXP (x, 0),
5722 REG_NOTES (insn));
5723 if (LABEL_P (XEXP (x, 0)))
5724 LABEL_NUSES (XEXP (x, 0))++;
5725 return;
5728 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
5730 if (fmt[i] == 'e')
5731 add_label_notes (XEXP (x, i), insn);
5732 else if (fmt[i] == 'E')
5733 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5734 add_label_notes (XVECEXP (x, i, j), insn);
5738 /* Compute transparent outgoing information for each block.
5740 An expression is transparent to an edge unless it is killed by
5741 the edge itself. This can only happen with abnormal control flow,
5742 when the edge is traversed through a call. This happens with
5743 non-local labels and exceptions.
5745 This would not be necessary if we split the edge. While this is
5746 normally impossible for abnormal critical edges, with some effort
5747 it should be possible with exception handling, since we still have
5748 control over which handler should be invoked. But due to increased
5749 EH table sizes, this may not be worthwhile. */
5751 static void
5752 compute_transpout ()
5754 basic_block bb;
5755 unsigned int i;
5756 struct expr *expr;
5758 sbitmap_vector_ones (transpout, last_basic_block);
5760 FOR_EACH_BB (bb)
5762 /* Note that flow inserted a nop a the end of basic blocks that
5763 end in call instructions for reasons other than abnormal
5764 control flow. */
5765 if (GET_CODE (bb->end) != CALL_INSN)
5766 continue;
5768 for (i = 0; i < expr_hash_table.size; i++)
5769 for (expr = expr_hash_table.table[i]; expr ; expr = expr->next_same_hash)
5770 if (GET_CODE (expr->expr) == MEM)
5772 if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
5773 && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
5774 continue;
5776 /* ??? Optimally, we would use interprocedural alias
5777 analysis to determine if this mem is actually killed
5778 by this call. */
5779 RESET_BIT (transpout[bb->index], expr->bitmap_index);
5784 /* Removal of useless null pointer checks */
5786 /* Called via note_stores. X is set by SETTER. If X is a register we must
5787 invalidate nonnull_local and set nonnull_killed. DATA is really a
5788 `null_pointer_info *'.
5790 We ignore hard registers. */
5792 static void
5793 invalidate_nonnull_info (x, setter, data)
5794 rtx x;
5795 rtx setter ATTRIBUTE_UNUSED;
5796 void *data;
5798 unsigned int regno;
5799 struct null_pointer_info *npi = (struct null_pointer_info *) data;
5801 while (GET_CODE (x) == SUBREG)
5802 x = SUBREG_REG (x);
5804 /* Ignore anything that is not a register or is a hard register. */
5805 if (GET_CODE (x) != REG
5806 || REGNO (x) < npi->min_reg
5807 || REGNO (x) >= npi->max_reg)
5808 return;
5810 regno = REGNO (x) - npi->min_reg;
5812 RESET_BIT (npi->nonnull_local[npi->current_block->index], regno);
5813 SET_BIT (npi->nonnull_killed[npi->current_block->index], regno);
5816 /* Do null-pointer check elimination for the registers indicated in
5817 NPI. NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps;
5818 they are not our responsibility to free. */
5820 static int
5821 delete_null_pointer_checks_1 (block_reg, nonnull_avin,
5822 nonnull_avout, npi)
5823 unsigned int *block_reg;
5824 sbitmap *nonnull_avin;
5825 sbitmap *nonnull_avout;
5826 struct null_pointer_info *npi;
5828 basic_block bb, current_block;
5829 sbitmap *nonnull_local = npi->nonnull_local;
5830 sbitmap *nonnull_killed = npi->nonnull_killed;
5831 int something_changed = 0;
5833 /* Compute local properties, nonnull and killed. A register will have
5834 the nonnull property if at the end of the current block its value is
5835 known to be nonnull. The killed property indicates that somewhere in
5836 the block any information we had about the register is killed.
5838 Note that a register can have both properties in a single block. That
5839 indicates that it's killed, then later in the block a new value is
5840 computed. */
5841 sbitmap_vector_zero (nonnull_local, last_basic_block);
5842 sbitmap_vector_zero (nonnull_killed, last_basic_block);
5844 FOR_EACH_BB (current_block)
5846 rtx insn, stop_insn;
5848 /* Set the current block for invalidate_nonnull_info. */
5849 npi->current_block = current_block;
5851 /* Scan each insn in the basic block looking for memory references and
5852 register sets. */
5853 stop_insn = NEXT_INSN (current_block->end);
5854 for (insn = current_block->head;
5855 insn != stop_insn;
5856 insn = NEXT_INSN (insn))
5858 rtx set;
5859 rtx reg;
5861 /* Ignore anything that is not a normal insn. */
5862 if (! INSN_P (insn))
5863 continue;
5865 /* Basically ignore anything that is not a simple SET. We do have
5866 to make sure to invalidate nonnull_local and set nonnull_killed
5867 for such insns though. */
5868 set = single_set (insn);
5869 if (!set)
5871 note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
5872 continue;
5875 /* See if we've got a usable memory load. We handle it first
5876 in case it uses its address register as a dest (which kills
5877 the nonnull property). */
5878 if (GET_CODE (SET_SRC (set)) == MEM
5879 && GET_CODE ((reg = XEXP (SET_SRC (set), 0))) == REG
5880 && REGNO (reg) >= npi->min_reg
5881 && REGNO (reg) < npi->max_reg)
5882 SET_BIT (nonnull_local[current_block->index],
5883 REGNO (reg) - npi->min_reg);
5885 /* Now invalidate stuff clobbered by this insn. */
5886 note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
5888 /* And handle stores, we do these last since any sets in INSN can
5889 not kill the nonnull property if it is derived from a MEM
5890 appearing in a SET_DEST. */
5891 if (GET_CODE (SET_DEST (set)) == MEM
5892 && GET_CODE ((reg = XEXP (SET_DEST (set), 0))) == REG
5893 && REGNO (reg) >= npi->min_reg
5894 && REGNO (reg) < npi->max_reg)
5895 SET_BIT (nonnull_local[current_block->index],
5896 REGNO (reg) - npi->min_reg);
5900 /* Now compute global properties based on the local properties. This
5901 is a classic global availability algorithm. */
5902 compute_available (nonnull_local, nonnull_killed,
5903 nonnull_avout, nonnull_avin);
5905 /* Now look at each bb and see if it ends with a compare of a value
5906 against zero. */
5907 FOR_EACH_BB (bb)
5909 rtx last_insn = bb->end;
5910 rtx condition, earliest;
5911 int compare_and_branch;
5913 /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and
5914 since BLOCK_REG[BB] is zero if this block did not end with a
5915 comparison against zero, this condition works. */
5916 if (block_reg[bb->index] < npi->min_reg
5917 || block_reg[bb->index] >= npi->max_reg)
5918 continue;
5920 /* LAST_INSN is a conditional jump. Get its condition. */
5921 condition = get_condition (last_insn, &earliest);
5923 /* If we can't determine the condition then skip. */
5924 if (! condition)
5925 continue;
5927 /* Is the register known to have a nonzero value? */
5928 if (!TEST_BIT (nonnull_avout[bb->index], block_reg[bb->index] - npi->min_reg))
5929 continue;
5931 /* Try to compute whether the compare/branch at the loop end is one or
5932 two instructions. */
5933 if (earliest == last_insn)
5934 compare_and_branch = 1;
5935 else if (earliest == prev_nonnote_insn (last_insn))
5936 compare_and_branch = 2;
5937 else
5938 continue;
5940 /* We know the register in this comparison is nonnull at exit from
5941 this block. We can optimize this comparison. */
5942 if (GET_CODE (condition) == NE)
5944 rtx new_jump;
5946 new_jump = emit_jump_insn_after (gen_jump (JUMP_LABEL (last_insn)),
5947 last_insn);
5948 JUMP_LABEL (new_jump) = JUMP_LABEL (last_insn);
5949 LABEL_NUSES (JUMP_LABEL (new_jump))++;
5950 emit_barrier_after (new_jump);
5953 something_changed = 1;
5954 delete_insn (last_insn);
5955 if (compare_and_branch == 2)
5956 delete_insn (earliest);
5957 purge_dead_edges (bb);
5959 /* Don't check this block again. (Note that BLOCK_END is
5960 invalid here; we deleted the last instruction in the
5961 block.) */
5962 block_reg[bb->index] = 0;
5965 return something_changed;
5968 /* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
5969 at compile time.
5971 This is conceptually similar to global constant/copy propagation and
5972 classic global CSE (it even uses the same dataflow equations as cprop).
5974 If a register is used as memory address with the form (mem (reg)), then we
5975 know that REG can not be zero at that point in the program. Any instruction
5976 which sets REG "kills" this property.
5978 So, if every path leading to a conditional branch has an available memory
5979 reference of that form, then we know the register can not have the value
5980 zero at the conditional branch.
5982 So we merely need to compute the local properties and propagate that data
5983 around the cfg, then optimize where possible.
5985 We run this pass two times. Once before CSE, then again after CSE. This
5986 has proven to be the most profitable approach. It is rare for new
5987 optimization opportunities of this nature to appear after the first CSE
5988 pass.
5990 This could probably be integrated with global cprop with a little work. */
5993 delete_null_pointer_checks (f)
5994 rtx f ATTRIBUTE_UNUSED;
5996 sbitmap *nonnull_avin, *nonnull_avout;
5997 unsigned int *block_reg;
5998 basic_block bb;
5999 int reg;
6000 int regs_per_pass;
6001 int max_reg;
6002 struct null_pointer_info npi;
6003 int something_changed = 0;
6005 /* If we have only a single block, then there's nothing to do. */
6006 if (n_basic_blocks <= 1)
6007 return 0;
6009 /* Trying to perform global optimizations on flow graphs which have
6010 a high connectivity will take a long time and is unlikely to be
6011 particularly useful.
6013 In normal circumstances a cfg should have about twice as many edges
6014 as blocks. But we do not want to punish small functions which have
6015 a couple switch statements. So we require a relatively large number
6016 of basic blocks and the ratio of edges to blocks to be high. */
6017 if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
6018 return 0;
6020 /* We need four bitmaps, each with a bit for each register in each
6021 basic block. */
6022 max_reg = max_reg_num ();
6023 regs_per_pass = get_bitmap_width (4, last_basic_block, max_reg);
6025 /* Allocate bitmaps to hold local and global properties. */
6026 npi.nonnull_local = sbitmap_vector_alloc (last_basic_block, regs_per_pass);
6027 npi.nonnull_killed = sbitmap_vector_alloc (last_basic_block, regs_per_pass);
6028 nonnull_avin = sbitmap_vector_alloc (last_basic_block, regs_per_pass);
6029 nonnull_avout = sbitmap_vector_alloc (last_basic_block, regs_per_pass);
6031 /* Go through the basic blocks, seeing whether or not each block
6032 ends with a conditional branch whose condition is a comparison
6033 against zero. Record the register compared in BLOCK_REG. */
6034 block_reg = (unsigned int *) xcalloc (last_basic_block, sizeof (int));
6035 FOR_EACH_BB (bb)
6037 rtx last_insn = bb->end;
6038 rtx condition, earliest, reg;
6040 /* We only want conditional branches. */
6041 if (GET_CODE (last_insn) != JUMP_INSN
6042 || !any_condjump_p (last_insn)
6043 || !onlyjump_p (last_insn))
6044 continue;
6046 /* LAST_INSN is a conditional jump. Get its condition. */
6047 condition = get_condition (last_insn, &earliest);
6049 /* If we were unable to get the condition, or it is not an equality
6050 comparison against zero then there's nothing we can do. */
6051 if (!condition
6052 || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
6053 || GET_CODE (XEXP (condition, 1)) != CONST_INT
6054 || (XEXP (condition, 1)
6055 != CONST0_RTX (GET_MODE (XEXP (condition, 0)))))
6056 continue;
6058 /* We must be checking a register against zero. */
6059 reg = XEXP (condition, 0);
6060 if (GET_CODE (reg) != REG)
6061 continue;
6063 block_reg[bb->index] = REGNO (reg);
6066 /* Go through the algorithm for each block of registers. */
6067 for (reg = FIRST_PSEUDO_REGISTER; reg < max_reg; reg += regs_per_pass)
6069 npi.min_reg = reg;
6070 npi.max_reg = MIN (reg + regs_per_pass, max_reg);
6071 something_changed |= delete_null_pointer_checks_1 (block_reg,
6072 nonnull_avin,
6073 nonnull_avout,
6074 &npi);
6077 /* Free the table of registers compared at the end of every block. */
6078 free (block_reg);
6080 /* Free bitmaps. */
6081 sbitmap_vector_free (npi.nonnull_local);
6082 sbitmap_vector_free (npi.nonnull_killed);
6083 sbitmap_vector_free (nonnull_avin);
6084 sbitmap_vector_free (nonnull_avout);
6086 return something_changed;
6089 /* Code Hoisting variables and subroutines. */
6091 /* Very busy expressions. */
6092 static sbitmap *hoist_vbein;
6093 static sbitmap *hoist_vbeout;
6095 /* Hoistable expressions. */
6096 static sbitmap *hoist_exprs;
6098 /* Dominator bitmaps. */
6099 dominance_info dominators;
6101 /* ??? We could compute post dominators and run this algorithm in
6102 reverse to perform tail merging, doing so would probably be
6103 more effective than the tail merging code in jump.c.
6105 It's unclear if tail merging could be run in parallel with
6106 code hoisting. It would be nice. */
6108 /* Allocate vars used for code hoisting analysis. */
6110 static void
6111 alloc_code_hoist_mem (n_blocks, n_exprs)
6112 int n_blocks, n_exprs;
6114 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
6115 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
6116 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
6118 hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
6119 hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
6120 hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
6121 transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
6124 /* Free vars used for code hoisting analysis. */
6126 static void
6127 free_code_hoist_mem ()
6129 sbitmap_vector_free (antloc);
6130 sbitmap_vector_free (transp);
6131 sbitmap_vector_free (comp);
6133 sbitmap_vector_free (hoist_vbein);
6134 sbitmap_vector_free (hoist_vbeout);
6135 sbitmap_vector_free (hoist_exprs);
6136 sbitmap_vector_free (transpout);
6138 free_dominance_info (dominators);
6141 /* Compute the very busy expressions at entry/exit from each block.
6143 An expression is very busy if all paths from a given point
6144 compute the expression. */
6146 static void
6147 compute_code_hoist_vbeinout ()
6149 int changed, passes;
6150 basic_block bb;
6152 sbitmap_vector_zero (hoist_vbeout, last_basic_block);
6153 sbitmap_vector_zero (hoist_vbein, last_basic_block);
6155 passes = 0;
6156 changed = 1;
6158 while (changed)
6160 changed = 0;
6162 /* We scan the blocks in the reverse order to speed up
6163 the convergence. */
6164 FOR_EACH_BB_REVERSE (bb)
6166 changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index], antloc[bb->index],
6167 hoist_vbeout[bb->index], transp[bb->index]);
6168 if (bb->next_bb != EXIT_BLOCK_PTR)
6169 sbitmap_intersection_of_succs (hoist_vbeout[bb->index], hoist_vbein, bb->index);
6172 passes++;
6175 if (gcse_file)
6176 fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes);
6179 /* Top level routine to do the dataflow analysis needed by code hoisting. */
6181 static void
6182 compute_code_hoist_data ()
6184 compute_local_properties (transp, comp, antloc, &expr_hash_table);
6185 compute_transpout ();
6186 compute_code_hoist_vbeinout ();
6187 dominators = calculate_dominance_info (CDI_DOMINATORS);
6188 if (gcse_file)
6189 fprintf (gcse_file, "\n");
6192 /* Determine if the expression identified by EXPR_INDEX would
6193 reach BB unimpared if it was placed at the end of EXPR_BB.
6195 It's unclear exactly what Muchnick meant by "unimpared". It seems
6196 to me that the expression must either be computed or transparent in
6197 *every* block in the path(s) from EXPR_BB to BB. Any other definition
6198 would allow the expression to be hoisted out of loops, even if
6199 the expression wasn't a loop invariant.
6201 Contrast this to reachability for PRE where an expression is
6202 considered reachable if *any* path reaches instead of *all*
6203 paths. */
6205 static int
6206 hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited)
6207 basic_block expr_bb;
6208 int expr_index;
6209 basic_block bb;
6210 char *visited;
6212 edge pred;
6213 int visited_allocated_locally = 0;
6216 if (visited == NULL)
6218 visited_allocated_locally = 1;
6219 visited = xcalloc (last_basic_block, 1);
6222 for (pred = bb->pred; pred != NULL; pred = pred->pred_next)
6224 basic_block pred_bb = pred->src;
6226 if (pred->src == ENTRY_BLOCK_PTR)
6227 break;
6228 else if (pred_bb == expr_bb)
6229 continue;
6230 else if (visited[pred_bb->index])
6231 continue;
6233 /* Does this predecessor generate this expression? */
6234 else if (TEST_BIT (comp[pred_bb->index], expr_index))
6235 break;
6236 else if (! TEST_BIT (transp[pred_bb->index], expr_index))
6237 break;
6239 /* Not killed. */
6240 else
6242 visited[pred_bb->index] = 1;
6243 if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
6244 pred_bb, visited))
6245 break;
6248 if (visited_allocated_locally)
6249 free (visited);
6251 return (pred == NULL);
6254 /* Actually perform code hoisting. */
6256 static void
6257 hoist_code ()
6259 basic_block bb, dominated;
6260 basic_block *domby;
6261 unsigned int domby_len;
6262 unsigned int i,j;
6263 struct expr **index_map;
6264 struct expr *expr;
6266 sbitmap_vector_zero (hoist_exprs, last_basic_block);
6268 /* Compute a mapping from expression number (`bitmap_index') to
6269 hash table entry. */
6271 index_map = (struct expr **) xcalloc (expr_hash_table.n_elems, sizeof (struct expr *));
6272 for (i = 0; i < expr_hash_table.size; i++)
6273 for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash)
6274 index_map[expr->bitmap_index] = expr;
6276 /* Walk over each basic block looking for potentially hoistable
6277 expressions, nothing gets hoisted from the entry block. */
6278 FOR_EACH_BB (bb)
6280 int found = 0;
6281 int insn_inserted_p;
6283 domby_len = get_dominated_by (dominators, bb, &domby);
6284 /* Examine each expression that is very busy at the exit of this
6285 block. These are the potentially hoistable expressions. */
6286 for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++)
6288 int hoistable = 0;
6290 if (TEST_BIT (hoist_vbeout[bb->index], i)
6291 && TEST_BIT (transpout[bb->index], i))
6293 /* We've found a potentially hoistable expression, now
6294 we look at every block BB dominates to see if it
6295 computes the expression. */
6296 for (j = 0; j < domby_len; j++)
6298 dominated = domby[j];
6299 /* Ignore self dominance. */
6300 if (bb == dominated)
6301 continue;
6302 /* We've found a dominated block, now see if it computes
6303 the busy expression and whether or not moving that
6304 expression to the "beginning" of that block is safe. */
6305 if (!TEST_BIT (antloc[dominated->index], i))
6306 continue;
6308 /* Note if the expression would reach the dominated block
6309 unimpared if it was placed at the end of BB.
6311 Keep track of how many times this expression is hoistable
6312 from a dominated block into BB. */
6313 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
6314 hoistable++;
6317 /* If we found more than one hoistable occurrence of this
6318 expression, then note it in the bitmap of expressions to
6319 hoist. It makes no sense to hoist things which are computed
6320 in only one BB, and doing so tends to pessimize register
6321 allocation. One could increase this value to try harder
6322 to avoid any possible code expansion due to register
6323 allocation issues; however experiments have shown that
6324 the vast majority of hoistable expressions are only movable
6325 from two successors, so raising this threshhold is likely
6326 to nullify any benefit we get from code hoisting. */
6327 if (hoistable > 1)
6329 SET_BIT (hoist_exprs[bb->index], i);
6330 found = 1;
6334 /* If we found nothing to hoist, then quit now. */
6335 if (! found)
6337 free (domby);
6338 continue;
6341 /* Loop over all the hoistable expressions. */
6342 for (i = 0; i < hoist_exprs[bb->index]->n_bits; i++)
6344 /* We want to insert the expression into BB only once, so
6345 note when we've inserted it. */
6346 insn_inserted_p = 0;
6348 /* These tests should be the same as the tests above. */
6349 if (TEST_BIT (hoist_vbeout[bb->index], i))
6351 /* We've found a potentially hoistable expression, now
6352 we look at every block BB dominates to see if it
6353 computes the expression. */
6354 for (j = 0; j < domby_len; j++)
6356 dominated = domby[j];
6357 /* Ignore self dominance. */
6358 if (bb == dominated)
6359 continue;
6361 /* We've found a dominated block, now see if it computes
6362 the busy expression and whether or not moving that
6363 expression to the "beginning" of that block is safe. */
6364 if (!TEST_BIT (antloc[dominated->index], i))
6365 continue;
6367 /* The expression is computed in the dominated block and
6368 it would be safe to compute it at the start of the
6369 dominated block. Now we have to determine if the
6370 expression would reach the dominated block if it was
6371 placed at the end of BB. */
6372 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
6374 struct expr *expr = index_map[i];
6375 struct occr *occr = expr->antic_occr;
6376 rtx insn;
6377 rtx set;
6379 /* Find the right occurrence of this expression. */
6380 while (BLOCK_FOR_INSN (occr->insn) != dominated && occr)
6381 occr = occr->next;
6383 /* Should never happen. */
6384 if (!occr)
6385 abort ();
6387 insn = occr->insn;
6389 set = single_set (insn);
6390 if (! set)
6391 abort ();
6393 /* Create a pseudo-reg to store the result of reaching
6394 expressions into. Get the mode for the new pseudo
6395 from the mode of the original destination pseudo. */
6396 if (expr->reaching_reg == NULL)
6397 expr->reaching_reg
6398 = gen_reg_rtx (GET_MODE (SET_DEST (set)));
6400 gcse_emit_move_after (expr->reaching_reg, SET_DEST (set), insn);
6401 delete_insn (insn);
6402 occr->deleted_p = 1;
6403 if (!insn_inserted_p)
6405 insert_insn_end_bb (index_map[i], bb, 0);
6406 insn_inserted_p = 1;
6412 free (domby);
6415 free (index_map);
6418 /* Top level routine to perform one code hoisting (aka unification) pass
6420 Return nonzero if a change was made. */
6422 static int
6423 one_code_hoisting_pass ()
6425 int changed = 0;
6427 alloc_hash_table (max_cuid, &expr_hash_table, 0);
6428 compute_hash_table (&expr_hash_table);
6429 if (gcse_file)
6430 dump_hash_table (gcse_file, "Code Hosting Expressions", &expr_hash_table);
6432 if (expr_hash_table.n_elems > 0)
6434 alloc_code_hoist_mem (last_basic_block, expr_hash_table.n_elems);
6435 compute_code_hoist_data ();
6436 hoist_code ();
6437 free_code_hoist_mem ();
6440 free_hash_table (&expr_hash_table);
6442 return changed;
6445 /* Here we provide the things required to do store motion towards
6446 the exit. In order for this to be effective, gcse also needed to
6447 be taught how to move a load when it is kill only by a store to itself.
6449 int i;
6450 float a[10];
6452 void foo(float scale)
6454 for (i=0; i<10; i++)
6455 a[i] *= scale;
6458 'i' is both loaded and stored to in the loop. Normally, gcse cannot move
6459 the load out since its live around the loop, and stored at the bottom
6460 of the loop.
6462 The 'Load Motion' referred to and implemented in this file is
6463 an enhancement to gcse which when using edge based lcm, recognizes
6464 this situation and allows gcse to move the load out of the loop.
6466 Once gcse has hoisted the load, store motion can then push this
6467 load towards the exit, and we end up with no loads or stores of 'i'
6468 in the loop. */
6470 /* This will search the ldst list for a matching expression. If it
6471 doesn't find one, we create one and initialize it. */
6473 static struct ls_expr *
6474 ldst_entry (x)
6475 rtx x;
6477 struct ls_expr * ptr;
6479 for (ptr = first_ls_expr(); ptr != NULL; ptr = next_ls_expr (ptr))
6480 if (expr_equiv_p (ptr->pattern, x))
6481 break;
6483 if (!ptr)
6485 ptr = (struct ls_expr *) xmalloc (sizeof (struct ls_expr));
6487 ptr->next = pre_ldst_mems;
6488 ptr->expr = NULL;
6489 ptr->pattern = x;
6490 ptr->loads = NULL_RTX;
6491 ptr->stores = NULL_RTX;
6492 ptr->reaching_reg = NULL_RTX;
6493 ptr->invalid = 0;
6494 ptr->index = 0;
6495 ptr->hash_index = 0;
6496 pre_ldst_mems = ptr;
6499 return ptr;
6502 /* Free up an individual ldst entry. */
6504 static void
6505 free_ldst_entry (ptr)
6506 struct ls_expr * ptr;
6508 free_INSN_LIST_list (& ptr->loads);
6509 free_INSN_LIST_list (& ptr->stores);
6511 free (ptr);
6514 /* Free up all memory associated with the ldst list. */
6516 static void
6517 free_ldst_mems ()
6519 while (pre_ldst_mems)
6521 struct ls_expr * tmp = pre_ldst_mems;
6523 pre_ldst_mems = pre_ldst_mems->next;
6525 free_ldst_entry (tmp);
6528 pre_ldst_mems = NULL;
6531 /* Dump debugging info about the ldst list. */
6533 static void
6534 print_ldst_list (file)
6535 FILE * file;
6537 struct ls_expr * ptr;
6539 fprintf (file, "LDST list: \n");
6541 for (ptr = first_ls_expr(); ptr != NULL; ptr = next_ls_expr (ptr))
6543 fprintf (file, " Pattern (%3d): ", ptr->index);
6545 print_rtl (file, ptr->pattern);
6547 fprintf (file, "\n Loads : ");
6549 if (ptr->loads)
6550 print_rtl (file, ptr->loads);
6551 else
6552 fprintf (file, "(nil)");
6554 fprintf (file, "\n Stores : ");
6556 if (ptr->stores)
6557 print_rtl (file, ptr->stores);
6558 else
6559 fprintf (file, "(nil)");
6561 fprintf (file, "\n\n");
6564 fprintf (file, "\n");
6567 /* Returns 1 if X is in the list of ldst only expressions. */
6569 static struct ls_expr *
6570 find_rtx_in_ldst (x)
6571 rtx x;
6573 struct ls_expr * ptr;
6575 for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
6576 if (expr_equiv_p (ptr->pattern, x) && ! ptr->invalid)
6577 return ptr;
6579 return NULL;
6582 /* Assign each element of the list of mems a monotonically increasing value. */
6584 static int
6585 enumerate_ldsts ()
6587 struct ls_expr * ptr;
6588 int n = 0;
6590 for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
6591 ptr->index = n++;
6593 return n;
6596 /* Return first item in the list. */
6598 static inline struct ls_expr *
6599 first_ls_expr ()
6601 return pre_ldst_mems;
6604 /* Return the next item in ther list after the specified one. */
6606 static inline struct ls_expr *
6607 next_ls_expr (ptr)
6608 struct ls_expr * ptr;
6610 return ptr->next;
6613 /* Load Motion for loads which only kill themselves. */
6615 /* Return true if x is a simple MEM operation, with no registers or
6616 side effects. These are the types of loads we consider for the
6617 ld_motion list, otherwise we let the usual aliasing take care of it. */
6619 static int
6620 simple_mem (x)
6621 rtx x;
6623 if (GET_CODE (x) != MEM)
6624 return 0;
6626 if (MEM_VOLATILE_P (x))
6627 return 0;
6629 if (GET_MODE (x) == BLKmode)
6630 return 0;
6632 if (!rtx_varies_p (XEXP (x, 0), 0))
6633 return 1;
6635 return 0;
6638 /* Make sure there isn't a buried reference in this pattern anywhere.
6639 If there is, invalidate the entry for it since we're not capable
6640 of fixing it up just yet.. We have to be sure we know about ALL
6641 loads since the aliasing code will allow all entries in the
6642 ld_motion list to not-alias itself. If we miss a load, we will get
6643 the wrong value since gcse might common it and we won't know to
6644 fix it up. */
6646 static void
6647 invalidate_any_buried_refs (x)
6648 rtx x;
6650 const char * fmt;
6651 int i, j;
6652 struct ls_expr * ptr;
6654 /* Invalidate it in the list. */
6655 if (GET_CODE (x) == MEM && simple_mem (x))
6657 ptr = ldst_entry (x);
6658 ptr->invalid = 1;
6661 /* Recursively process the insn. */
6662 fmt = GET_RTX_FORMAT (GET_CODE (x));
6664 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6666 if (fmt[i] == 'e')
6667 invalidate_any_buried_refs (XEXP (x, i));
6668 else if (fmt[i] == 'E')
6669 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6670 invalidate_any_buried_refs (XVECEXP (x, i, j));
6674 /* Find all the 'simple' MEMs which are used in LOADs and STORES. Simple
6675 being defined as MEM loads and stores to symbols, with no
6676 side effects and no registers in the expression. If there are any
6677 uses/defs which don't match this criteria, it is invalidated and
6678 trimmed out later. */
6680 static void
6681 compute_ld_motion_mems ()
6683 struct ls_expr * ptr;
6684 basic_block bb;
6685 rtx insn;
6687 pre_ldst_mems = NULL;
6689 FOR_EACH_BB (bb)
6691 for (insn = bb->head;
6692 insn && insn != NEXT_INSN (bb->end);
6693 insn = NEXT_INSN (insn))
6695 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
6697 if (GET_CODE (PATTERN (insn)) == SET)
6699 rtx src = SET_SRC (PATTERN (insn));
6700 rtx dest = SET_DEST (PATTERN (insn));
6702 /* Check for a simple LOAD... */
6703 if (GET_CODE (src) == MEM && simple_mem (src))
6705 ptr = ldst_entry (src);
6706 if (GET_CODE (dest) == REG)
6707 ptr->loads = alloc_INSN_LIST (insn, ptr->loads);
6708 else
6709 ptr->invalid = 1;
6711 else
6713 /* Make sure there isn't a buried load somewhere. */
6714 invalidate_any_buried_refs (src);
6717 /* Check for stores. Don't worry about aliased ones, they
6718 will block any movement we might do later. We only care
6719 about this exact pattern since those are the only
6720 circumstance that we will ignore the aliasing info. */
6721 if (GET_CODE (dest) == MEM && simple_mem (dest))
6723 ptr = ldst_entry (dest);
6725 if (GET_CODE (src) != MEM
6726 && GET_CODE (src) != ASM_OPERANDS)
6727 ptr->stores = alloc_INSN_LIST (insn, ptr->stores);
6728 else
6729 ptr->invalid = 1;
6732 else
6733 invalidate_any_buried_refs (PATTERN (insn));
6739 /* Remove any references that have been either invalidated or are not in the
6740 expression list for pre gcse. */
6742 static void
6743 trim_ld_motion_mems ()
6745 struct ls_expr * last = NULL;
6746 struct ls_expr * ptr = first_ls_expr ();
6748 while (ptr != NULL)
6750 int del = ptr->invalid;
6751 struct expr * expr = NULL;
6753 /* Delete if entry has been made invalid. */
6754 if (!del)
6756 unsigned int i;
6758 del = 1;
6759 /* Delete if we cannot find this mem in the expression list. */
6760 for (i = 0; i < expr_hash_table.size && del; i++)
6762 for (expr = expr_hash_table.table[i];
6763 expr != NULL;
6764 expr = expr->next_same_hash)
6765 if (expr_equiv_p (expr->expr, ptr->pattern))
6767 del = 0;
6768 break;
6773 if (del)
6775 if (last != NULL)
6777 last->next = ptr->next;
6778 free_ldst_entry (ptr);
6779 ptr = last->next;
6781 else
6783 pre_ldst_mems = pre_ldst_mems->next;
6784 free_ldst_entry (ptr);
6785 ptr = pre_ldst_mems;
6788 else
6790 /* Set the expression field if we are keeping it. */
6791 last = ptr;
6792 ptr->expr = expr;
6793 ptr = ptr->next;
6797 /* Show the world what we've found. */
6798 if (gcse_file && pre_ldst_mems != NULL)
6799 print_ldst_list (gcse_file);
6802 /* This routine will take an expression which we are replacing with
6803 a reaching register, and update any stores that are needed if
6804 that expression is in the ld_motion list. Stores are updated by
6805 copying their SRC to the reaching register, and then storeing
6806 the reaching register into the store location. These keeps the
6807 correct value in the reaching register for the loads. */
6809 static void
6810 update_ld_motion_stores (expr)
6811 struct expr * expr;
6813 struct ls_expr * mem_ptr;
6815 if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
6817 /* We can try to find just the REACHED stores, but is shouldn't
6818 matter to set the reaching reg everywhere... some might be
6819 dead and should be eliminated later. */
6821 /* We replace SET mem = expr with
6822 SET reg = expr
6823 SET mem = reg , where reg is the
6824 reaching reg used in the load. */
6825 rtx list = mem_ptr->stores;
6827 for ( ; list != NULL_RTX; list = XEXP (list, 1))
6829 rtx insn = XEXP (list, 0);
6830 rtx pat = PATTERN (insn);
6831 rtx src = SET_SRC (pat);
6832 rtx reg = expr->reaching_reg;
6833 rtx copy, new;
6835 /* If we've already copied it, continue. */
6836 if (expr->reaching_reg == src)
6837 continue;
6839 if (gcse_file)
6841 fprintf (gcse_file, "PRE: store updated with reaching reg ");
6842 print_rtl (gcse_file, expr->reaching_reg);
6843 fprintf (gcse_file, ":\n ");
6844 print_inline_rtx (gcse_file, insn, 8);
6845 fprintf (gcse_file, "\n");
6848 copy = gen_move_insn ( reg, SET_SRC (pat));
6849 new = emit_insn_before (copy, insn);
6850 record_one_set (REGNO (reg), new);
6851 SET_SRC (pat) = reg;
6853 /* un-recognize this pattern since it's probably different now. */
6854 INSN_CODE (insn) = -1;
6855 gcse_create_count++;
6860 /* Store motion code. */
6862 /* This is used to communicate the target bitvector we want to use in the
6863 reg_set_info routine when called via the note_stores mechanism. */
6864 static sbitmap * regvec;
6866 /* Used in computing the reverse edge graph bit vectors. */
6867 static sbitmap * st_antloc;
6869 /* Global holding the number of store expressions we are dealing with. */
6870 static int num_stores;
6872 /* Checks to set if we need to mark a register set. Called from note_stores. */
6874 static void
6875 reg_set_info (dest, setter, data)
6876 rtx dest, setter ATTRIBUTE_UNUSED;
6877 void * data ATTRIBUTE_UNUSED;
6879 if (GET_CODE (dest) == SUBREG)
6880 dest = SUBREG_REG (dest);
6882 if (GET_CODE (dest) == REG)
6883 SET_BIT (*regvec, REGNO (dest));
6886 /* Return nonzero if the register operands of expression X are killed
6887 anywhere in basic block BB. */
6889 static int
6890 store_ops_ok (x, bb)
6891 rtx x;
6892 basic_block bb;
6894 int i;
6895 enum rtx_code code;
6896 const char * fmt;
6898 /* Repeat is used to turn tail-recursion into iteration. */
6899 repeat:
6901 if (x == 0)
6902 return 1;
6904 code = GET_CODE (x);
6905 switch (code)
6907 case REG:
6908 /* If a reg has changed after us in this
6909 block, the operand has been killed. */
6910 return TEST_BIT (reg_set_in_block[bb->index], REGNO (x));
6912 case MEM:
6913 x = XEXP (x, 0);
6914 goto repeat;
6916 case PRE_DEC:
6917 case PRE_INC:
6918 case POST_DEC:
6919 case POST_INC:
6920 return 0;
6922 case PC:
6923 case CC0: /*FIXME*/
6924 case CONST:
6925 case CONST_INT:
6926 case CONST_DOUBLE:
6927 case CONST_VECTOR:
6928 case SYMBOL_REF:
6929 case LABEL_REF:
6930 case ADDR_VEC:
6931 case ADDR_DIFF_VEC:
6932 return 1;
6934 default:
6935 break;
6938 i = GET_RTX_LENGTH (code) - 1;
6939 fmt = GET_RTX_FORMAT (code);
6941 for (; i >= 0; i--)
6943 if (fmt[i] == 'e')
6945 rtx tem = XEXP (x, i);
6947 /* If we are about to do the last recursive call
6948 needed at this level, change it into iteration.
6949 This function is called enough to be worth it. */
6950 if (i == 0)
6952 x = tem;
6953 goto repeat;
6956 if (! store_ops_ok (tem, bb))
6957 return 0;
6959 else if (fmt[i] == 'E')
6961 int j;
6963 for (j = 0; j < XVECLEN (x, i); j++)
6965 if (! store_ops_ok (XVECEXP (x, i, j), bb))
6966 return 0;
6971 return 1;
6974 /* Determine whether insn is MEM store pattern that we will consider moving. */
6976 static void
6977 find_moveable_store (insn)
6978 rtx insn;
6980 struct ls_expr * ptr;
6981 rtx dest = PATTERN (insn);
6983 if (GET_CODE (dest) != SET
6984 || GET_CODE (SET_SRC (dest)) == ASM_OPERANDS)
6985 return;
6987 dest = SET_DEST (dest);
6989 if (GET_CODE (dest) != MEM || MEM_VOLATILE_P (dest)
6990 || GET_MODE (dest) == BLKmode)
6991 return;
6993 if (GET_CODE (XEXP (dest, 0)) != SYMBOL_REF)
6994 return;
6996 if (rtx_varies_p (XEXP (dest, 0), 0))
6997 return;
6999 ptr = ldst_entry (dest);
7000 ptr->stores = alloc_INSN_LIST (insn, ptr->stores);
7003 /* Perform store motion. Much like gcse, except we move expressions the
7004 other way by looking at the flowgraph in reverse. */
7006 static int
7007 compute_store_table ()
7009 int ret;
7010 basic_block bb;
7011 unsigned regno;
7012 rtx insn, pat;
7014 max_gcse_regno = max_reg_num ();
7016 reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (last_basic_block,
7017 max_gcse_regno);
7018 sbitmap_vector_zero (reg_set_in_block, last_basic_block);
7019 pre_ldst_mems = 0;
7021 /* Find all the stores we care about. */
7022 FOR_EACH_BB (bb)
7024 regvec = & (reg_set_in_block[bb->index]);
7025 for (insn = bb->end;
7026 insn && insn != PREV_INSN (bb->end);
7027 insn = PREV_INSN (insn))
7029 /* Ignore anything that is not a normal insn. */
7030 if (! INSN_P (insn))
7031 continue;
7033 if (GET_CODE (insn) == CALL_INSN)
7035 bool clobbers_all = false;
7036 #ifdef NON_SAVING_SETJMP
7037 if (NON_SAVING_SETJMP
7038 && find_reg_note (insn, REG_SETJMP, NULL_RTX))
7039 clobbers_all = true;
7040 #endif
7042 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7043 if (clobbers_all
7044 || TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
7045 SET_BIT (reg_set_in_block[bb->index], regno);
7048 pat = PATTERN (insn);
7049 note_stores (pat, reg_set_info, NULL);
7051 /* Now that we've marked regs, look for stores. */
7052 if (GET_CODE (pat) == SET)
7053 find_moveable_store (insn);
7057 ret = enumerate_ldsts ();
7059 if (gcse_file)
7061 fprintf (gcse_file, "Store Motion Expressions.\n");
7062 print_ldst_list (gcse_file);
7065 return ret;
7068 /* Check to see if the load X is aliased with STORE_PATTERN. */
7070 static int
7071 load_kills_store (x, store_pattern)
7072 rtx x, store_pattern;
7074 if (true_dependence (x, GET_MODE (x), store_pattern, rtx_addr_varies_p))
7075 return 1;
7076 return 0;
7079 /* Go through the entire insn X, looking for any loads which might alias
7080 STORE_PATTERN. Return 1 if found. */
7082 static int
7083 find_loads (x, store_pattern)
7084 rtx x, store_pattern;
7086 const char * fmt;
7087 int i, j;
7088 int ret = 0;
7090 if (!x)
7091 return 0;
7093 if (GET_CODE (x) == SET)
7094 x = SET_SRC (x);
7096 if (GET_CODE (x) == MEM)
7098 if (load_kills_store (x, store_pattern))
7099 return 1;
7102 /* Recursively process the insn. */
7103 fmt = GET_RTX_FORMAT (GET_CODE (x));
7105 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0 && !ret; i--)
7107 if (fmt[i] == 'e')
7108 ret |= find_loads (XEXP (x, i), store_pattern);
7109 else if (fmt[i] == 'E')
7110 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7111 ret |= find_loads (XVECEXP (x, i, j), store_pattern);
7113 return ret;
7116 /* Check if INSN kills the store pattern X (is aliased with it).
7117 Return 1 if it it does. */
7119 static int
7120 store_killed_in_insn (x, insn)
7121 rtx x, insn;
7123 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
7124 return 0;
7126 if (GET_CODE (insn) == CALL_INSN)
7128 /* A normal or pure call might read from pattern,
7129 but a const call will not. */
7130 return ! CONST_OR_PURE_CALL_P (insn) || pure_call_p (insn);
7133 if (GET_CODE (PATTERN (insn)) == SET)
7135 rtx pat = PATTERN (insn);
7136 /* Check for memory stores to aliased objects. */
7137 if (GET_CODE (SET_DEST (pat)) == MEM && !expr_equiv_p (SET_DEST (pat), x))
7138 /* pretend its a load and check for aliasing. */
7139 if (find_loads (SET_DEST (pat), x))
7140 return 1;
7141 return find_loads (SET_SRC (pat), x);
7143 else
7144 return find_loads (PATTERN (insn), x);
7147 /* Returns 1 if the expression X is loaded or clobbered on or after INSN
7148 within basic block BB. */
7150 static int
7151 store_killed_after (x, insn, bb)
7152 rtx x, insn;
7153 basic_block bb;
7155 rtx last = bb->end;
7157 if (insn == last)
7158 return 0;
7160 /* Check if the register operands of the store are OK in this block.
7161 Note that if registers are changed ANYWHERE in the block, we'll
7162 decide we can't move it, regardless of whether it changed above
7163 or below the store. This could be improved by checking the register
7164 operands while looking for aliasing in each insn. */
7165 if (!store_ops_ok (XEXP (x, 0), bb))
7166 return 1;
7168 for ( ; insn && insn != NEXT_INSN (last); insn = NEXT_INSN (insn))
7169 if (store_killed_in_insn (x, insn))
7170 return 1;
7172 return 0;
7175 /* Returns 1 if the expression X is loaded or clobbered on or before INSN
7176 within basic block BB. */
7177 static int
7178 store_killed_before (x, insn, bb)
7179 rtx x, insn;
7180 basic_block bb;
7182 rtx first = bb->head;
7184 if (insn == first)
7185 return store_killed_in_insn (x, insn);
7187 /* Check if the register operands of the store are OK in this block.
7188 Note that if registers are changed ANYWHERE in the block, we'll
7189 decide we can't move it, regardless of whether it changed above
7190 or below the store. This could be improved by checking the register
7191 operands while looking for aliasing in each insn. */
7192 if (!store_ops_ok (XEXP (x, 0), bb))
7193 return 1;
7195 for ( ; insn && insn != PREV_INSN (first); insn = PREV_INSN (insn))
7196 if (store_killed_in_insn (x, insn))
7197 return 1;
7199 return 0;
7202 #define ANTIC_STORE_LIST(x) ((x)->loads)
7203 #define AVAIL_STORE_LIST(x) ((x)->stores)
7205 /* Given the table of available store insns at the end of blocks,
7206 determine which ones are not killed by aliasing, and generate
7207 the appropriate vectors for gen and killed. */
7208 static void
7209 build_store_vectors ()
7211 basic_block bb, b;
7212 rtx insn, st;
7213 struct ls_expr * ptr;
7215 /* Build the gen_vector. This is any store in the table which is not killed
7216 by aliasing later in its block. */
7217 ae_gen = (sbitmap *) sbitmap_vector_alloc (last_basic_block, num_stores);
7218 sbitmap_vector_zero (ae_gen, last_basic_block);
7220 st_antloc = (sbitmap *) sbitmap_vector_alloc (last_basic_block, num_stores);
7221 sbitmap_vector_zero (st_antloc, last_basic_block);
7223 for (ptr = first_ls_expr (); ptr != NULL; ptr = next_ls_expr (ptr))
7225 /* Put all the stores into either the antic list, or the avail list,
7226 or both. */
7227 rtx store_list = ptr->stores;
7228 ptr->stores = NULL_RTX;
7230 for (st = store_list; st != NULL; st = XEXP (st, 1))
7232 insn = XEXP (st, 0);
7233 bb = BLOCK_FOR_INSN (insn);
7235 if (!store_killed_after (ptr->pattern, insn, bb))
7237 /* If we've already seen an available expression in this block,
7238 we can delete the one we saw already (It occurs earlier in
7239 the block), and replace it with this one). We'll copy the
7240 old SRC expression to an unused register in case there
7241 are any side effects. */
7242 if (TEST_BIT (ae_gen[bb->index], ptr->index))
7244 /* Find previous store. */
7245 rtx st;
7246 for (st = AVAIL_STORE_LIST (ptr); st ; st = XEXP (st, 1))
7247 if (BLOCK_FOR_INSN (XEXP (st, 0)) == bb)
7248 break;
7249 if (st)
7251 rtx r = gen_reg_rtx (GET_MODE (ptr->pattern));
7252 if (gcse_file)
7253 fprintf (gcse_file, "Removing redundant store:\n");
7254 replace_store_insn (r, XEXP (st, 0), bb);
7255 XEXP (st, 0) = insn;
7256 continue;
7259 SET_BIT (ae_gen[bb->index], ptr->index);
7260 AVAIL_STORE_LIST (ptr) = alloc_INSN_LIST (insn,
7261 AVAIL_STORE_LIST (ptr));
7264 if (!store_killed_before (ptr->pattern, insn, bb))
7266 SET_BIT (st_antloc[BLOCK_NUM (insn)], ptr->index);
7267 ANTIC_STORE_LIST (ptr) = alloc_INSN_LIST (insn,
7268 ANTIC_STORE_LIST (ptr));
7272 /* Free the original list of store insns. */
7273 free_INSN_LIST_list (&store_list);
7276 ae_kill = (sbitmap *) sbitmap_vector_alloc (last_basic_block, num_stores);
7277 sbitmap_vector_zero (ae_kill, last_basic_block);
7279 transp = (sbitmap *) sbitmap_vector_alloc (last_basic_block, num_stores);
7280 sbitmap_vector_zero (transp, last_basic_block);
7282 for (ptr = first_ls_expr (); ptr != NULL; ptr = next_ls_expr (ptr))
7283 FOR_EACH_BB (b)
7285 if (store_killed_after (ptr->pattern, b->head, b))
7287 /* The anticipatable expression is not killed if it's gen'd. */
7289 We leave this check out for now. If we have a code sequence
7290 in a block which looks like:
7291 ST MEMa = x
7292 L y = MEMa
7293 ST MEMa = z
7294 We should flag this as having an ANTIC expression, NOT
7295 transparent, NOT killed, and AVAIL.
7296 Unfortunately, since we haven't re-written all loads to
7297 use the reaching reg, we'll end up doing an incorrect
7298 Load in the middle here if we push the store down. It happens in
7299 gcc.c-torture/execute/960311-1.c with -O3
7300 If we always kill it in this case, we'll sometimes do
7301 unnecessary work, but it shouldn't actually hurt anything.
7302 if (!TEST_BIT (ae_gen[b], ptr->index)). */
7303 SET_BIT (ae_kill[b->index], ptr->index);
7305 else
7306 SET_BIT (transp[b->index], ptr->index);
7309 /* Any block with no exits calls some non-returning function, so
7310 we better mark the store killed here, or we might not store to
7311 it at all. If we knew it was abort, we wouldn't have to store,
7312 but we don't know that for sure. */
7313 if (gcse_file)
7315 fprintf (gcse_file, "ST_avail and ST_antic (shown under loads..)\n");
7316 print_ldst_list (gcse_file);
7317 dump_sbitmap_vector (gcse_file, "st_antloc", "", st_antloc, last_basic_block);
7318 dump_sbitmap_vector (gcse_file, "st_kill", "", ae_kill, last_basic_block);
7319 dump_sbitmap_vector (gcse_file, "Transpt", "", transp, last_basic_block);
7320 dump_sbitmap_vector (gcse_file, "st_avloc", "", ae_gen, last_basic_block);
7324 /* Insert an instruction at the beginning of a basic block, and update
7325 the BLOCK_HEAD if needed. */
7327 static void
7328 insert_insn_start_bb (insn, bb)
7329 rtx insn;
7330 basic_block bb;
7332 /* Insert at start of successor block. */
7333 rtx prev = PREV_INSN (bb->head);
7334 rtx before = bb->head;
7335 while (before != 0)
7337 if (GET_CODE (before) != CODE_LABEL
7338 && (GET_CODE (before) != NOTE
7339 || NOTE_LINE_NUMBER (before) != NOTE_INSN_BASIC_BLOCK))
7340 break;
7341 prev = before;
7342 if (prev == bb->end)
7343 break;
7344 before = NEXT_INSN (before);
7347 insn = emit_insn_after (insn, prev);
7349 if (gcse_file)
7351 fprintf (gcse_file, "STORE_MOTION insert store at start of BB %d:\n",
7352 bb->index);
7353 print_inline_rtx (gcse_file, insn, 6);
7354 fprintf (gcse_file, "\n");
7358 /* This routine will insert a store on an edge. EXPR is the ldst entry for
7359 the memory reference, and E is the edge to insert it on. Returns nonzero
7360 if an edge insertion was performed. */
7362 static int
7363 insert_store (expr, e)
7364 struct ls_expr * expr;
7365 edge e;
7367 rtx reg, insn;
7368 basic_block bb;
7369 edge tmp;
7371 /* We did all the deleted before this insert, so if we didn't delete a
7372 store, then we haven't set the reaching reg yet either. */
7373 if (expr->reaching_reg == NULL_RTX)
7374 return 0;
7376 reg = expr->reaching_reg;
7377 insn = gen_move_insn (expr->pattern, reg);
7379 /* If we are inserting this expression on ALL predecessor edges of a BB,
7380 insert it at the start of the BB, and reset the insert bits on the other
7381 edges so we don't try to insert it on the other edges. */
7382 bb = e->dest;
7383 for (tmp = e->dest->pred; tmp ; tmp = tmp->pred_next)
7385 int index = EDGE_INDEX (edge_list, tmp->src, tmp->dest);
7386 if (index == EDGE_INDEX_NO_EDGE)
7387 abort ();
7388 if (! TEST_BIT (pre_insert_map[index], expr->index))
7389 break;
7392 /* If tmp is NULL, we found an insertion on every edge, blank the
7393 insertion vector for these edges, and insert at the start of the BB. */
7394 if (!tmp && bb != EXIT_BLOCK_PTR)
7396 for (tmp = e->dest->pred; tmp ; tmp = tmp->pred_next)
7398 int index = EDGE_INDEX (edge_list, tmp->src, tmp->dest);
7399 RESET_BIT (pre_insert_map[index], expr->index);
7401 insert_insn_start_bb (insn, bb);
7402 return 0;
7405 /* We can't insert on this edge, so we'll insert at the head of the
7406 successors block. See Morgan, sec 10.5. */
7407 if ((e->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL)
7409 insert_insn_start_bb (insn, bb);
7410 return 0;
7413 insert_insn_on_edge (insn, e);
7415 if (gcse_file)
7417 fprintf (gcse_file, "STORE_MOTION insert insn on edge (%d, %d):\n",
7418 e->src->index, e->dest->index);
7419 print_inline_rtx (gcse_file, insn, 6);
7420 fprintf (gcse_file, "\n");
7423 return 1;
7426 /* This routine will replace a store with a SET to a specified register. */
7428 static void
7429 replace_store_insn (reg, del, bb)
7430 rtx reg, del;
7431 basic_block bb;
7433 rtx insn;
7435 insn = gen_move_insn (reg, SET_SRC (PATTERN (del)));
7436 insn = emit_insn_after (insn, del);
7438 if (gcse_file)
7440 fprintf (gcse_file,
7441 "STORE_MOTION delete insn in BB %d:\n ", bb->index);
7442 print_inline_rtx (gcse_file, del, 6);
7443 fprintf (gcse_file, "\nSTORE MOTION replaced with insn:\n ");
7444 print_inline_rtx (gcse_file, insn, 6);
7445 fprintf (gcse_file, "\n");
7448 delete_insn (del);
7452 /* Delete a store, but copy the value that would have been stored into
7453 the reaching_reg for later storing. */
7455 static void
7456 delete_store (expr, bb)
7457 struct ls_expr * expr;
7458 basic_block bb;
7460 rtx reg, i, del;
7462 if (expr->reaching_reg == NULL_RTX)
7463 expr->reaching_reg = gen_reg_rtx (GET_MODE (expr->pattern));
7466 /* If there is more than 1 store, the earlier ones will be dead,
7467 but it doesn't hurt to replace them here. */
7468 reg = expr->reaching_reg;
7470 for (i = AVAIL_STORE_LIST (expr); i; i = XEXP (i, 1))
7472 del = XEXP (i, 0);
7473 if (BLOCK_FOR_INSN (del) == bb)
7475 /* We know there is only one since we deleted redundant
7476 ones during the available computation. */
7477 replace_store_insn (reg, del, bb);
7478 break;
7483 /* Free memory used by store motion. */
7485 static void
7486 free_store_memory ()
7488 free_ldst_mems ();
7490 if (ae_gen)
7491 sbitmap_vector_free (ae_gen);
7492 if (ae_kill)
7493 sbitmap_vector_free (ae_kill);
7494 if (transp)
7495 sbitmap_vector_free (transp);
7496 if (st_antloc)
7497 sbitmap_vector_free (st_antloc);
7498 if (pre_insert_map)
7499 sbitmap_vector_free (pre_insert_map);
7500 if (pre_delete_map)
7501 sbitmap_vector_free (pre_delete_map);
7502 if (reg_set_in_block)
7503 sbitmap_vector_free (reg_set_in_block);
7505 ae_gen = ae_kill = transp = st_antloc = NULL;
7506 pre_insert_map = pre_delete_map = reg_set_in_block = NULL;
7509 /* Perform store motion. Much like gcse, except we move expressions the
7510 other way by looking at the flowgraph in reverse. */
7512 static void
7513 store_motion ()
7515 basic_block bb;
7516 int x;
7517 struct ls_expr * ptr;
7518 int update_flow = 0;
7520 if (gcse_file)
7522 fprintf (gcse_file, "before store motion\n");
7523 print_rtl (gcse_file, get_insns ());
7527 init_alias_analysis ();
7529 /* Find all the stores that are live to the end of their block. */
7530 num_stores = compute_store_table ();
7531 if (num_stores == 0)
7533 sbitmap_vector_free (reg_set_in_block);
7534 end_alias_analysis ();
7535 return;
7538 /* Now compute whats actually available to move. */
7539 add_noreturn_fake_exit_edges ();
7540 build_store_vectors ();
7542 edge_list = pre_edge_rev_lcm (gcse_file, num_stores, transp, ae_gen,
7543 st_antloc, ae_kill, &pre_insert_map,
7544 &pre_delete_map);
7546 /* Now we want to insert the new stores which are going to be needed. */
7547 for (ptr = first_ls_expr (); ptr != NULL; ptr = next_ls_expr (ptr))
7549 FOR_EACH_BB (bb)
7550 if (TEST_BIT (pre_delete_map[bb->index], ptr->index))
7551 delete_store (ptr, bb);
7553 for (x = 0; x < NUM_EDGES (edge_list); x++)
7554 if (TEST_BIT (pre_insert_map[x], ptr->index))
7555 update_flow |= insert_store (ptr, INDEX_EDGE (edge_list, x));
7558 if (update_flow)
7559 commit_edge_insertions ();
7561 free_store_memory ();
7562 free_edge_list (edge_list);
7563 remove_fake_edges ();
7564 end_alias_analysis ();
7568 /* Entry point for jump bypassing optimization pass. */
7571 bypass_jumps (file)
7572 FILE *file;
7574 int changed;
7576 /* We do not construct an accurate cfg in functions which call
7577 setjmp, so just punt to be safe. */
7578 if (current_function_calls_setjmp)
7579 return 0;
7581 /* For calling dump_foo fns from gdb. */
7582 debug_stderr = stderr;
7583 gcse_file = file;
7585 /* Identify the basic block information for this function, including
7586 successors and predecessors. */
7587 max_gcse_regno = max_reg_num ();
7589 if (file)
7590 dump_flow_info (file);
7592 /* Return if there's nothing to do. */
7593 if (n_basic_blocks <= 1)
7594 return 0;
7596 /* Trying to perform global optimizations on flow graphs which have
7597 a high connectivity will take a long time and is unlikely to be
7598 particularly useful.
7600 In normal circumstances a cfg should have about twice as many edges
7601 as blocks. But we do not want to punish small functions which have
7602 a couple switch statements. So we require a relatively large number
7603 of basic blocks and the ratio of edges to blocks to be high. */
7604 if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
7606 if (warn_disabled_optimization)
7607 warning ("BYPASS disabled: %d > 1000 basic blocks and %d >= 20 edges/basic block",
7608 n_basic_blocks, n_edges / n_basic_blocks);
7609 return 0;
7612 /* If allocating memory for the cprop bitmap would take up too much
7613 storage it's better just to disable the optimization. */
7614 if ((n_basic_blocks
7615 * SBITMAP_SET_SIZE (max_gcse_regno)
7616 * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
7618 if (warn_disabled_optimization)
7619 warning ("GCSE disabled: %d basic blocks and %d registers",
7620 n_basic_blocks, max_gcse_regno);
7622 return 0;
7625 /* See what modes support reg/reg copy operations. */
7626 if (! can_copy_init_p)
7628 compute_can_copy ();
7629 can_copy_init_p = 1;
7632 gcc_obstack_init (&gcse_obstack);
7633 bytes_used = 0;
7635 /* We need alias. */
7636 init_alias_analysis ();
7638 /* Record where pseudo-registers are set. This data is kept accurate
7639 during each pass. ??? We could also record hard-reg information here
7640 [since it's unchanging], however it is currently done during hash table
7641 computation.
7643 It may be tempting to compute MEM set information here too, but MEM sets
7644 will be subject to code motion one day and thus we need to compute
7645 information about memory sets when we build the hash tables. */
7647 alloc_reg_set_mem (max_gcse_regno);
7648 compute_sets (get_insns ());
7650 max_gcse_regno = max_reg_num ();
7651 alloc_gcse_mem (get_insns ());
7652 changed = one_cprop_pass (1, 1, 1);
7653 free_gcse_mem ();
7655 if (file)
7657 fprintf (file, "BYPASS of %s: %d basic blocks, ",
7658 current_function_name, n_basic_blocks);
7659 fprintf (file, "%d bytes\n\n", bytes_used);
7662 obstack_free (&gcse_obstack, NULL);
7663 free_reg_set_mem ();
7665 /* We are finished with alias. */
7666 end_alias_analysis ();
7667 allocate_reg_info (max_reg_num (), FALSE, FALSE);
7669 return changed;
7672 #include "gt-gcse.h"