* gcc.dg/c11-complex-1.c: Use dg-add-options ieee.
[official-gcc.git] / gcc / tree-ssa-tail-merge.c
blob79be216569346123cdc68229e0e17eecaa626ff4
1 /* Tail merging for gimple.
2 Copyright (C) 2011-2013 Free Software Foundation, Inc.
3 Contributed by Tom de Vries (tom@codesourcery.com)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Pass overview.
24 MOTIVATIONAL EXAMPLE
26 gimple representation of gcc/testsuite/gcc.dg/pr43864.c at
28 hprofStartupp (charD.1 * outputFileNameD.2600, charD.1 * ctxD.2601)
30 struct FILED.1638 * fpD.2605;
31 charD.1 fileNameD.2604[1000];
32 intD.0 D.3915;
33 const charD.1 * restrict outputFileName.0D.3914;
35 # BLOCK 2 freq:10000
36 # PRED: ENTRY [100.0%] (fallthru,exec)
37 # PT = nonlocal { D.3926 } (restr)
38 outputFileName.0D.3914_3
39 = (const charD.1 * restrict) outputFileNameD.2600_2(D);
40 # .MEMD.3923_13 = VDEF <.MEMD.3923_12(D)>
41 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
42 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
43 sprintfD.759 (&fileNameD.2604, outputFileName.0D.3914_3);
44 # .MEMD.3923_14 = VDEF <.MEMD.3923_13>
45 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
46 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
47 D.3915_4 = accessD.2606 (&fileNameD.2604, 1);
48 if (D.3915_4 == 0)
49 goto <bb 3>;
50 else
51 goto <bb 4>;
52 # SUCC: 3 [10.0%] (true,exec) 4 [90.0%] (false,exec)
54 # BLOCK 3 freq:1000
55 # PRED: 2 [10.0%] (true,exec)
56 # .MEMD.3923_15 = VDEF <.MEMD.3923_14>
57 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
58 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
59 freeD.898 (ctxD.2601_5(D));
60 goto <bb 7>;
61 # SUCC: 7 [100.0%] (fallthru,exec)
63 # BLOCK 4 freq:9000
64 # PRED: 2 [90.0%] (false,exec)
65 # .MEMD.3923_16 = VDEF <.MEMD.3923_14>
66 # PT = nonlocal escaped
67 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
68 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
69 fpD.2605_8 = fopenD.1805 (&fileNameD.2604[0], 0B);
70 if (fpD.2605_8 == 0B)
71 goto <bb 5>;
72 else
73 goto <bb 6>;
74 # SUCC: 5 [1.9%] (true,exec) 6 [98.1%] (false,exec)
76 # BLOCK 5 freq:173
77 # PRED: 4 [1.9%] (true,exec)
78 # .MEMD.3923_17 = VDEF <.MEMD.3923_16>
79 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
80 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
81 freeD.898 (ctxD.2601_5(D));
82 goto <bb 7>;
83 # SUCC: 7 [100.0%] (fallthru,exec)
85 # BLOCK 6 freq:8827
86 # PRED: 4 [98.1%] (false,exec)
87 # .MEMD.3923_18 = VDEF <.MEMD.3923_16>
88 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
89 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
90 fooD.2599 (outputFileNameD.2600_2(D), fpD.2605_8);
91 # SUCC: 7 [100.0%] (fallthru,exec)
93 # BLOCK 7 freq:10000
94 # PRED: 3 [100.0%] (fallthru,exec) 5 [100.0%] (fallthru,exec)
95 6 [100.0%] (fallthru,exec)
96 # PT = nonlocal null
98 # ctxD.2601_1 = PHI <0B(3), 0B(5), ctxD.2601_5(D)(6)>
99 # .MEMD.3923_11 = PHI <.MEMD.3923_15(3), .MEMD.3923_17(5),
100 .MEMD.3923_18(6)>
101 # VUSE <.MEMD.3923_11>
102 return ctxD.2601_1;
103 # SUCC: EXIT [100.0%]
106 bb 3 and bb 5 can be merged. The blocks have different predecessors, but the
107 same successors, and the same operations.
110 CONTEXT
112 A technique called tail merging (or cross jumping) can fix the example
113 above. For a block, we look for common code at the end (the tail) of the
114 predecessor blocks, and insert jumps from one block to the other.
115 The example is a special case for tail merging, in that 2 whole blocks
116 can be merged, rather than just the end parts of it.
117 We currently only focus on whole block merging, so in that sense
118 calling this pass tail merge is a bit of a misnomer.
120 We distinguish 2 kinds of situations in which blocks can be merged:
121 - same operations, same predecessors. The successor edges coming from one
122 block are redirected to come from the other block.
123 - same operations, same successors. The predecessor edges entering one block
124 are redirected to enter the other block. Note that this operation might
125 involve introducing phi operations.
127 For efficient implementation, we would like to value numbers the blocks, and
128 have a comparison operator that tells us whether the blocks are equal.
129 Besides being runtime efficient, block value numbering should also abstract
130 from irrelevant differences in order of operations, much like normal value
131 numbering abstracts from irrelevant order of operations.
133 For the first situation (same_operations, same predecessors), normal value
134 numbering fits well. We can calculate a block value number based on the
135 value numbers of the defs and vdefs.
137 For the second situation (same operations, same successors), this approach
138 doesn't work so well. We can illustrate this using the example. The calls
139 to free use different vdefs: MEMD.3923_16 and MEMD.3923_14, and these will
140 remain different in value numbering, since they represent different memory
141 states. So the resulting vdefs of the frees will be different in value
142 numbering, so the block value numbers will be different.
144 The reason why we call the blocks equal is not because they define the same
145 values, but because uses in the blocks use (possibly different) defs in the
146 same way. To be able to detect this efficiently, we need to do some kind of
147 reverse value numbering, meaning number the uses rather than the defs, and
148 calculate a block value number based on the value number of the uses.
149 Ideally, a block comparison operator will also indicate which phis are needed
150 to merge the blocks.
152 For the moment, we don't do block value numbering, but we do insn-by-insn
153 matching, using scc value numbers to match operations with results, and
154 structural comparison otherwise, while ignoring vop mismatches.
157 IMPLEMENTATION
159 1. The pass first determines all groups of blocks with the same successor
160 blocks.
161 2. Within each group, it tries to determine clusters of equal basic blocks.
162 3. The clusters are applied.
163 4. The same successor groups are updated.
164 5. This process is repeated from 2 onwards, until no more changes.
167 LIMITATIONS/TODO
169 - block only
170 - handles only 'same operations, same successors'.
171 It handles same predecessors as a special subcase though.
172 - does not implement the reverse value numbering and block value numbering.
173 - improve memory allocation: use garbage collected memory, obstacks,
174 allocpools where appropriate.
175 - no insertion of gimple_reg phis, We only introduce vop-phis.
176 - handle blocks with gimple_reg phi_nodes.
179 PASS PLACEMENT
180 This 'pass' is not a stand-alone gimple pass, but runs as part of
181 pass_pre, in order to share the value numbering.
184 SWITCHES
186 - ftree-tail-merge. On at -O2. We may have to enable it only at -Os. */
188 #include "config.h"
189 #include "system.h"
190 #include "coretypes.h"
191 #include "tm.h"
192 #include "tree.h"
193 #include "tm_p.h"
194 #include "basic-block.h"
195 #include "flags.h"
196 #include "function.h"
197 #include "gimple.h"
198 #include "gimple-iterator.h"
199 #include "gimple-ssa.h"
200 #include "tree-cfg.h"
201 #include "tree-phinodes.h"
202 #include "ssa-iterators.h"
203 #include "tree-into-ssa.h"
204 #include "tree-ssa-alias.h"
205 #include "params.h"
206 #include "hash-table.h"
207 #include "gimple-pretty-print.h"
208 #include "tree-ssa-sccvn.h"
209 #include "tree-dump.h"
210 #include "cfgloop.h"
211 #include "tree-pass.h"
212 #include "trans-mem.h"
214 /* Describes a group of bbs with the same successors. The successor bbs are
215 cached in succs, and the successor edge flags are cached in succ_flags.
216 If a bb has the EDGE_TRUE/VALSE_VALUE flags swapped compared to succ_flags,
217 it's marked in inverse.
218 Additionally, the hash value for the struct is cached in hashval, and
219 in_worklist indicates whether it's currently part of worklist. */
221 struct same_succ_def
223 /* The bbs that have the same successor bbs. */
224 bitmap bbs;
225 /* The successor bbs. */
226 bitmap succs;
227 /* Indicates whether the EDGE_TRUE/FALSE_VALUEs of succ_flags are swapped for
228 bb. */
229 bitmap inverse;
230 /* The edge flags for each of the successor bbs. */
231 vec<int> succ_flags;
232 /* Indicates whether the struct is currently in the worklist. */
233 bool in_worklist;
234 /* The hash value of the struct. */
235 hashval_t hashval;
237 /* hash_table support. */
238 typedef same_succ_def value_type;
239 typedef same_succ_def compare_type;
240 static inline hashval_t hash (const value_type *);
241 static int equal (const value_type *, const compare_type *);
242 static void remove (value_type *);
244 typedef struct same_succ_def *same_succ;
245 typedef const struct same_succ_def *const_same_succ;
247 /* hash routine for hash_table support, returns hashval of E. */
249 inline hashval_t
250 same_succ_def::hash (const value_type *e)
252 return e->hashval;
255 /* A group of bbs where 1 bb from bbs can replace the other bbs. */
257 struct bb_cluster_def
259 /* The bbs in the cluster. */
260 bitmap bbs;
261 /* The preds of the bbs in the cluster. */
262 bitmap preds;
263 /* Index in all_clusters vector. */
264 int index;
265 /* The bb to replace the cluster with. */
266 basic_block rep_bb;
268 typedef struct bb_cluster_def *bb_cluster;
269 typedef const struct bb_cluster_def *const_bb_cluster;
271 /* Per bb-info. */
273 struct aux_bb_info
275 /* The number of non-debug statements in the bb. */
276 int size;
277 /* The same_succ that this bb is a member of. */
278 same_succ bb_same_succ;
279 /* The cluster that this bb is a member of. */
280 bb_cluster cluster;
281 /* The vop state at the exit of a bb. This is shortlived data, used to
282 communicate data between update_block_by and update_vuses. */
283 tree vop_at_exit;
284 /* The bb that either contains or is dominated by the dependencies of the
285 bb. */
286 basic_block dep_bb;
289 /* Macros to access the fields of struct aux_bb_info. */
291 #define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size)
292 #define BB_SAME_SUCC(bb) (((struct aux_bb_info *)bb->aux)->bb_same_succ)
293 #define BB_CLUSTER(bb) (((struct aux_bb_info *)bb->aux)->cluster)
294 #define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit)
295 #define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb)
297 /* Returns true if the only effect a statement STMT has, is to define locally
298 used SSA_NAMEs. */
300 static bool
301 stmt_local_def (gimple stmt)
303 basic_block bb, def_bb;
304 imm_use_iterator iter;
305 use_operand_p use_p;
306 tree val;
307 def_operand_p def_p;
309 if (gimple_has_side_effects (stmt)
310 || gimple_vdef (stmt) != NULL_TREE)
311 return false;
313 def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF);
314 if (def_p == NULL)
315 return false;
317 val = DEF_FROM_PTR (def_p);
318 if (val == NULL_TREE || TREE_CODE (val) != SSA_NAME)
319 return false;
321 def_bb = gimple_bb (stmt);
323 FOR_EACH_IMM_USE_FAST (use_p, iter, val)
325 if (is_gimple_debug (USE_STMT (use_p)))
326 continue;
327 bb = gimple_bb (USE_STMT (use_p));
328 if (bb == def_bb)
329 continue;
331 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
332 && EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src == def_bb)
333 continue;
335 return false;
338 return true;
341 /* Let GSI skip forwards over local defs. */
343 static void
344 gsi_advance_fw_nondebug_nonlocal (gimple_stmt_iterator *gsi)
346 gimple stmt;
348 while (true)
350 if (gsi_end_p (*gsi))
351 return;
352 stmt = gsi_stmt (*gsi);
353 if (!stmt_local_def (stmt))
354 return;
355 gsi_next_nondebug (gsi);
359 /* VAL1 and VAL2 are either:
360 - uses in BB1 and BB2, or
361 - phi alternatives for BB1 and BB2.
362 Return true if the uses have the same gvn value. */
364 static bool
365 gvn_uses_equal (tree val1, tree val2)
367 gcc_checking_assert (val1 != NULL_TREE && val2 != NULL_TREE);
369 if (val1 == val2)
370 return true;
372 if (vn_valueize (val1) != vn_valueize (val2))
373 return false;
375 return ((TREE_CODE (val1) == SSA_NAME || CONSTANT_CLASS_P (val1))
376 && (TREE_CODE (val2) == SSA_NAME || CONSTANT_CLASS_P (val2)));
379 /* Prints E to FILE. */
381 static void
382 same_succ_print (FILE *file, const same_succ e)
384 unsigned int i;
385 bitmap_print (file, e->bbs, "bbs:", "\n");
386 bitmap_print (file, e->succs, "succs:", "\n");
387 bitmap_print (file, e->inverse, "inverse:", "\n");
388 fprintf (file, "flags:");
389 for (i = 0; i < e->succ_flags.length (); ++i)
390 fprintf (file, " %x", e->succ_flags[i]);
391 fprintf (file, "\n");
394 /* Prints same_succ VE to VFILE. */
396 inline int
397 ssa_same_succ_print_traverse (same_succ *pe, FILE *file)
399 const same_succ e = *pe;
400 same_succ_print (file, e);
401 return 1;
404 /* Update BB_DEP_BB (USE_BB), given a use of VAL in USE_BB. */
406 static void
407 update_dep_bb (basic_block use_bb, tree val)
409 basic_block dep_bb;
411 /* Not a dep. */
412 if (TREE_CODE (val) != SSA_NAME)
413 return;
415 /* Skip use of global def. */
416 if (SSA_NAME_IS_DEFAULT_DEF (val))
417 return;
419 /* Skip use of local def. */
420 dep_bb = gimple_bb (SSA_NAME_DEF_STMT (val));
421 if (dep_bb == use_bb)
422 return;
424 if (BB_DEP_BB (use_bb) == NULL
425 || dominated_by_p (CDI_DOMINATORS, dep_bb, BB_DEP_BB (use_bb)))
426 BB_DEP_BB (use_bb) = dep_bb;
429 /* Update BB_DEP_BB, given the dependencies in STMT. */
431 static void
432 stmt_update_dep_bb (gimple stmt)
434 ssa_op_iter iter;
435 use_operand_p use;
437 FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE)
438 update_dep_bb (gimple_bb (stmt), USE_FROM_PTR (use));
441 /* Calculates hash value for same_succ VE. */
443 static hashval_t
444 same_succ_hash (const_same_succ e)
446 hashval_t hashval = bitmap_hash (e->succs);
447 int flags;
448 unsigned int i;
449 unsigned int first = bitmap_first_set_bit (e->bbs);
450 basic_block bb = BASIC_BLOCK (first);
451 int size = 0;
452 gimple_stmt_iterator gsi;
453 gimple stmt;
454 tree arg;
455 unsigned int s;
456 bitmap_iterator bs;
458 for (gsi = gsi_start_nondebug_bb (bb);
459 !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
461 stmt = gsi_stmt (gsi);
462 stmt_update_dep_bb (stmt);
463 if (stmt_local_def (stmt))
464 continue;
465 size++;
467 hashval = iterative_hash_hashval_t (gimple_code (stmt), hashval);
468 if (is_gimple_assign (stmt))
469 hashval = iterative_hash_hashval_t (gimple_assign_rhs_code (stmt),
470 hashval);
471 if (!is_gimple_call (stmt))
472 continue;
473 if (gimple_call_internal_p (stmt))
474 hashval = iterative_hash_hashval_t
475 ((hashval_t) gimple_call_internal_fn (stmt), hashval);
476 else
477 hashval = iterative_hash_expr (gimple_call_fn (stmt), hashval);
478 for (i = 0; i < gimple_call_num_args (stmt); i++)
480 arg = gimple_call_arg (stmt, i);
481 arg = vn_valueize (arg);
482 hashval = iterative_hash_expr (arg, hashval);
486 hashval = iterative_hash_hashval_t (size, hashval);
487 BB_SIZE (bb) = size;
489 for (i = 0; i < e->succ_flags.length (); ++i)
491 flags = e->succ_flags[i];
492 flags = flags & ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
493 hashval = iterative_hash_hashval_t (flags, hashval);
496 EXECUTE_IF_SET_IN_BITMAP (e->succs, 0, s, bs)
498 int n = find_edge (bb, BASIC_BLOCK (s))->dest_idx;
499 for (gsi = gsi_start_phis (BASIC_BLOCK (s)); !gsi_end_p (gsi);
500 gsi_next (&gsi))
502 gimple phi = gsi_stmt (gsi);
503 tree lhs = gimple_phi_result (phi);
504 tree val = gimple_phi_arg_def (phi, n);
506 if (virtual_operand_p (lhs))
507 continue;
508 update_dep_bb (bb, val);
512 return hashval;
515 /* Returns true if E1 and E2 have 2 successors, and if the successor flags
516 are inverse for the EDGE_TRUE_VALUE and EDGE_FALSE_VALUE flags, and equal for
517 the other edge flags. */
519 static bool
520 inverse_flags (const_same_succ e1, const_same_succ e2)
522 int f1a, f1b, f2a, f2b;
523 int mask = ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
525 if (e1->succ_flags.length () != 2)
526 return false;
528 f1a = e1->succ_flags[0];
529 f1b = e1->succ_flags[1];
530 f2a = e2->succ_flags[0];
531 f2b = e2->succ_flags[1];
533 if (f1a == f2a && f1b == f2b)
534 return false;
536 return (f1a & mask) == (f2a & mask) && (f1b & mask) == (f2b & mask);
539 /* Compares SAME_SUCCs E1 and E2. */
542 same_succ_def::equal (const value_type *e1, const compare_type *e2)
544 unsigned int i, first1, first2;
545 gimple_stmt_iterator gsi1, gsi2;
546 gimple s1, s2;
547 basic_block bb1, bb2;
549 if (e1->hashval != e2->hashval)
550 return 0;
552 if (e1->succ_flags.length () != e2->succ_flags.length ())
553 return 0;
555 if (!bitmap_equal_p (e1->succs, e2->succs))
556 return 0;
558 if (!inverse_flags (e1, e2))
560 for (i = 0; i < e1->succ_flags.length (); ++i)
561 if (e1->succ_flags[i] != e1->succ_flags[i])
562 return 0;
565 first1 = bitmap_first_set_bit (e1->bbs);
566 first2 = bitmap_first_set_bit (e2->bbs);
568 bb1 = BASIC_BLOCK (first1);
569 bb2 = BASIC_BLOCK (first2);
571 if (BB_SIZE (bb1) != BB_SIZE (bb2))
572 return 0;
574 gsi1 = gsi_start_nondebug_bb (bb1);
575 gsi2 = gsi_start_nondebug_bb (bb2);
576 gsi_advance_fw_nondebug_nonlocal (&gsi1);
577 gsi_advance_fw_nondebug_nonlocal (&gsi2);
578 while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2)))
580 s1 = gsi_stmt (gsi1);
581 s2 = gsi_stmt (gsi2);
582 if (gimple_code (s1) != gimple_code (s2))
583 return 0;
584 if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2))
585 return 0;
586 gsi_next_nondebug (&gsi1);
587 gsi_next_nondebug (&gsi2);
588 gsi_advance_fw_nondebug_nonlocal (&gsi1);
589 gsi_advance_fw_nondebug_nonlocal (&gsi2);
592 return 1;
595 /* Alloc and init a new SAME_SUCC. */
597 static same_succ
598 same_succ_alloc (void)
600 same_succ same = XNEW (struct same_succ_def);
602 same->bbs = BITMAP_ALLOC (NULL);
603 same->succs = BITMAP_ALLOC (NULL);
604 same->inverse = BITMAP_ALLOC (NULL);
605 same->succ_flags.create (10);
606 same->in_worklist = false;
608 return same;
611 /* Delete same_succ E. */
613 void
614 same_succ_def::remove (same_succ e)
616 BITMAP_FREE (e->bbs);
617 BITMAP_FREE (e->succs);
618 BITMAP_FREE (e->inverse);
619 e->succ_flags.release ();
621 XDELETE (e);
624 /* Reset same_succ SAME. */
626 static void
627 same_succ_reset (same_succ same)
629 bitmap_clear (same->bbs);
630 bitmap_clear (same->succs);
631 bitmap_clear (same->inverse);
632 same->succ_flags.truncate (0);
635 static hash_table <same_succ_def> same_succ_htab;
637 /* Array that is used to store the edge flags for a successor. */
639 static int *same_succ_edge_flags;
641 /* Bitmap that is used to mark bbs that are recently deleted. */
643 static bitmap deleted_bbs;
645 /* Bitmap that is used to mark predecessors of bbs that are
646 deleted. */
648 static bitmap deleted_bb_preds;
650 /* Prints same_succ_htab to stderr. */
652 extern void debug_same_succ (void);
653 DEBUG_FUNCTION void
654 debug_same_succ ( void)
656 same_succ_htab.traverse <FILE *, ssa_same_succ_print_traverse> (stderr);
660 /* Vector of bbs to process. */
662 static vec<same_succ> worklist;
664 /* Prints worklist to FILE. */
666 static void
667 print_worklist (FILE *file)
669 unsigned int i;
670 for (i = 0; i < worklist.length (); ++i)
671 same_succ_print (file, worklist[i]);
674 /* Adds SAME to worklist. */
676 static void
677 add_to_worklist (same_succ same)
679 if (same->in_worklist)
680 return;
682 if (bitmap_count_bits (same->bbs) < 2)
683 return;
685 same->in_worklist = true;
686 worklist.safe_push (same);
689 /* Add BB to same_succ_htab. */
691 static void
692 find_same_succ_bb (basic_block bb, same_succ *same_p)
694 unsigned int j;
695 bitmap_iterator bj;
696 same_succ same = *same_p;
697 same_succ *slot;
698 edge_iterator ei;
699 edge e;
701 if (bb == NULL
702 /* Be conservative with loop structure. It's not evident that this test
703 is sufficient. Before tail-merge, we've just called
704 loop_optimizer_finalize, and LOOPS_MAY_HAVE_MULTIPLE_LATCHES is now
705 set, so there's no guarantee that the loop->latch value is still valid.
706 But we assume that, since we've forced LOOPS_HAVE_SIMPLE_LATCHES at the
707 start of pre, we've kept that property intact throughout pre, and are
708 keeping it throughout tail-merge using this test. */
709 || bb->loop_father->latch == bb)
710 return;
711 bitmap_set_bit (same->bbs, bb->index);
712 FOR_EACH_EDGE (e, ei, bb->succs)
714 int index = e->dest->index;
715 bitmap_set_bit (same->succs, index);
716 same_succ_edge_flags[index] = e->flags;
718 EXECUTE_IF_SET_IN_BITMAP (same->succs, 0, j, bj)
719 same->succ_flags.safe_push (same_succ_edge_flags[j]);
721 same->hashval = same_succ_hash (same);
723 slot = same_succ_htab.find_slot_with_hash (same, same->hashval, INSERT);
724 if (*slot == NULL)
726 *slot = same;
727 BB_SAME_SUCC (bb) = same;
728 add_to_worklist (same);
729 *same_p = NULL;
731 else
733 bitmap_set_bit ((*slot)->bbs, bb->index);
734 BB_SAME_SUCC (bb) = *slot;
735 add_to_worklist (*slot);
736 if (inverse_flags (same, *slot))
737 bitmap_set_bit ((*slot)->inverse, bb->index);
738 same_succ_reset (same);
742 /* Find bbs with same successors. */
744 static void
745 find_same_succ (void)
747 same_succ same = same_succ_alloc ();
748 basic_block bb;
750 FOR_EACH_BB (bb)
752 find_same_succ_bb (bb, &same);
753 if (same == NULL)
754 same = same_succ_alloc ();
757 same_succ_def::remove (same);
760 /* Initializes worklist administration. */
762 static void
763 init_worklist (void)
765 alloc_aux_for_blocks (sizeof (struct aux_bb_info));
766 same_succ_htab.create (n_basic_blocks_for_fn (cfun));
767 same_succ_edge_flags = XCNEWVEC (int, last_basic_block);
768 deleted_bbs = BITMAP_ALLOC (NULL);
769 deleted_bb_preds = BITMAP_ALLOC (NULL);
770 worklist.create (n_basic_blocks_for_fn (cfun));
771 find_same_succ ();
773 if (dump_file && (dump_flags & TDF_DETAILS))
775 fprintf (dump_file, "initial worklist:\n");
776 print_worklist (dump_file);
780 /* Deletes worklist administration. */
782 static void
783 delete_worklist (void)
785 free_aux_for_blocks ();
786 same_succ_htab.dispose ();
787 XDELETEVEC (same_succ_edge_flags);
788 same_succ_edge_flags = NULL;
789 BITMAP_FREE (deleted_bbs);
790 BITMAP_FREE (deleted_bb_preds);
791 worklist.release ();
794 /* Mark BB as deleted, and mark its predecessors. */
796 static void
797 mark_basic_block_deleted (basic_block bb)
799 edge e;
800 edge_iterator ei;
802 bitmap_set_bit (deleted_bbs, bb->index);
804 FOR_EACH_EDGE (e, ei, bb->preds)
805 bitmap_set_bit (deleted_bb_preds, e->src->index);
808 /* Removes BB from its corresponding same_succ. */
810 static void
811 same_succ_flush_bb (basic_block bb)
813 same_succ same = BB_SAME_SUCC (bb);
814 BB_SAME_SUCC (bb) = NULL;
815 if (bitmap_single_bit_set_p (same->bbs))
816 same_succ_htab.remove_elt_with_hash (same, same->hashval);
817 else
818 bitmap_clear_bit (same->bbs, bb->index);
821 /* Removes all bbs in BBS from their corresponding same_succ. */
823 static void
824 same_succ_flush_bbs (bitmap bbs)
826 unsigned int i;
827 bitmap_iterator bi;
829 EXECUTE_IF_SET_IN_BITMAP (bbs, 0, i, bi)
830 same_succ_flush_bb (BASIC_BLOCK (i));
833 /* Release the last vdef in BB, either normal or phi result. */
835 static void
836 release_last_vdef (basic_block bb)
838 gimple_stmt_iterator i;
840 for (i = gsi_last_bb (bb); !gsi_end_p (i); gsi_prev_nondebug (&i))
842 gimple stmt = gsi_stmt (i);
843 if (gimple_vdef (stmt) == NULL_TREE)
844 continue;
846 mark_virtual_operand_for_renaming (gimple_vdef (stmt));
847 return;
850 for (i = gsi_start_phis (bb); !gsi_end_p (i); gsi_next (&i))
852 gimple phi = gsi_stmt (i);
853 tree res = gimple_phi_result (phi);
855 if (!virtual_operand_p (res))
856 continue;
858 mark_virtual_phi_result_for_renaming (phi);
859 return;
864 /* For deleted_bb_preds, find bbs with same successors. */
866 static void
867 update_worklist (void)
869 unsigned int i;
870 bitmap_iterator bi;
871 basic_block bb;
872 same_succ same;
874 bitmap_and_compl_into (deleted_bb_preds, deleted_bbs);
875 bitmap_clear (deleted_bbs);
877 bitmap_clear_bit (deleted_bb_preds, ENTRY_BLOCK);
878 same_succ_flush_bbs (deleted_bb_preds);
880 same = same_succ_alloc ();
881 EXECUTE_IF_SET_IN_BITMAP (deleted_bb_preds, 0, i, bi)
883 bb = BASIC_BLOCK (i);
884 gcc_assert (bb != NULL);
885 find_same_succ_bb (bb, &same);
886 if (same == NULL)
887 same = same_succ_alloc ();
889 same_succ_def::remove (same);
890 bitmap_clear (deleted_bb_preds);
893 /* Prints cluster C to FILE. */
895 static void
896 print_cluster (FILE *file, bb_cluster c)
898 if (c == NULL)
899 return;
900 bitmap_print (file, c->bbs, "bbs:", "\n");
901 bitmap_print (file, c->preds, "preds:", "\n");
904 /* Prints cluster C to stderr. */
906 extern void debug_cluster (bb_cluster);
907 DEBUG_FUNCTION void
908 debug_cluster (bb_cluster c)
910 print_cluster (stderr, c);
913 /* Update C->rep_bb, given that BB is added to the cluster. */
915 static void
916 update_rep_bb (bb_cluster c, basic_block bb)
918 /* Initial. */
919 if (c->rep_bb == NULL)
921 c->rep_bb = bb;
922 return;
925 /* Current needs no deps, keep it. */
926 if (BB_DEP_BB (c->rep_bb) == NULL)
927 return;
929 /* Bb needs no deps, change rep_bb. */
930 if (BB_DEP_BB (bb) == NULL)
932 c->rep_bb = bb;
933 return;
936 /* Bb needs last deps earlier than current, change rep_bb. A potential
937 problem with this, is that the first deps might also be earlier, which
938 would mean we prefer longer lifetimes for the deps. To be able to check
939 for this, we would have to trace BB_FIRST_DEP_BB as well, besides
940 BB_DEP_BB, which is really BB_LAST_DEP_BB.
941 The benefit of choosing the bb with last deps earlier, is that it can
942 potentially be used as replacement for more bbs. */
943 if (dominated_by_p (CDI_DOMINATORS, BB_DEP_BB (c->rep_bb), BB_DEP_BB (bb)))
944 c->rep_bb = bb;
947 /* Add BB to cluster C. Sets BB in C->bbs, and preds of BB in C->preds. */
949 static void
950 add_bb_to_cluster (bb_cluster c, basic_block bb)
952 edge e;
953 edge_iterator ei;
955 bitmap_set_bit (c->bbs, bb->index);
957 FOR_EACH_EDGE (e, ei, bb->preds)
958 bitmap_set_bit (c->preds, e->src->index);
960 update_rep_bb (c, bb);
963 /* Allocate and init new cluster. */
965 static bb_cluster
966 new_cluster (void)
968 bb_cluster c;
969 c = XCNEW (struct bb_cluster_def);
970 c->bbs = BITMAP_ALLOC (NULL);
971 c->preds = BITMAP_ALLOC (NULL);
972 c->rep_bb = NULL;
973 return c;
976 /* Delete clusters. */
978 static void
979 delete_cluster (bb_cluster c)
981 if (c == NULL)
982 return;
983 BITMAP_FREE (c->bbs);
984 BITMAP_FREE (c->preds);
985 XDELETE (c);
989 /* Array that contains all clusters. */
991 static vec<bb_cluster> all_clusters;
993 /* Allocate all cluster vectors. */
995 static void
996 alloc_cluster_vectors (void)
998 all_clusters.create (n_basic_blocks_for_fn (cfun));
1001 /* Reset all cluster vectors. */
1003 static void
1004 reset_cluster_vectors (void)
1006 unsigned int i;
1007 basic_block bb;
1008 for (i = 0; i < all_clusters.length (); ++i)
1009 delete_cluster (all_clusters[i]);
1010 all_clusters.truncate (0);
1011 FOR_EACH_BB (bb)
1012 BB_CLUSTER (bb) = NULL;
1015 /* Delete all cluster vectors. */
1017 static void
1018 delete_cluster_vectors (void)
1020 unsigned int i;
1021 for (i = 0; i < all_clusters.length (); ++i)
1022 delete_cluster (all_clusters[i]);
1023 all_clusters.release ();
1026 /* Merge cluster C2 into C1. */
1028 static void
1029 merge_clusters (bb_cluster c1, bb_cluster c2)
1031 bitmap_ior_into (c1->bbs, c2->bbs);
1032 bitmap_ior_into (c1->preds, c2->preds);
1035 /* Register equivalence of BB1 and BB2 (members of cluster C). Store c in
1036 all_clusters, or merge c with existing cluster. */
1038 static void
1039 set_cluster (basic_block bb1, basic_block bb2)
1041 basic_block merge_bb, other_bb;
1042 bb_cluster merge, old, c;
1044 if (BB_CLUSTER (bb1) == NULL && BB_CLUSTER (bb2) == NULL)
1046 c = new_cluster ();
1047 add_bb_to_cluster (c, bb1);
1048 add_bb_to_cluster (c, bb2);
1049 BB_CLUSTER (bb1) = c;
1050 BB_CLUSTER (bb2) = c;
1051 c->index = all_clusters.length ();
1052 all_clusters.safe_push (c);
1054 else if (BB_CLUSTER (bb1) == NULL || BB_CLUSTER (bb2) == NULL)
1056 merge_bb = BB_CLUSTER (bb1) == NULL ? bb2 : bb1;
1057 other_bb = BB_CLUSTER (bb1) == NULL ? bb1 : bb2;
1058 merge = BB_CLUSTER (merge_bb);
1059 add_bb_to_cluster (merge, other_bb);
1060 BB_CLUSTER (other_bb) = merge;
1062 else if (BB_CLUSTER (bb1) != BB_CLUSTER (bb2))
1064 unsigned int i;
1065 bitmap_iterator bi;
1067 old = BB_CLUSTER (bb2);
1068 merge = BB_CLUSTER (bb1);
1069 merge_clusters (merge, old);
1070 EXECUTE_IF_SET_IN_BITMAP (old->bbs, 0, i, bi)
1071 BB_CLUSTER (BASIC_BLOCK (i)) = merge;
1072 all_clusters[old->index] = NULL;
1073 update_rep_bb (merge, old->rep_bb);
1074 delete_cluster (old);
1076 else
1077 gcc_unreachable ();
1080 /* Return true if gimple operands T1 and T2 have the same value. */
1082 static bool
1083 gimple_operand_equal_value_p (tree t1, tree t2)
1085 if (t1 == t2)
1086 return true;
1088 if (t1 == NULL_TREE
1089 || t2 == NULL_TREE)
1090 return false;
1092 if (operand_equal_p (t1, t2, 0))
1093 return true;
1095 return gvn_uses_equal (t1, t2);
1098 /* Return true if gimple statements S1 and S2 are equal. Gimple_bb (s1) and
1099 gimple_bb (s2) are members of SAME_SUCC. */
1101 static bool
1102 gimple_equal_p (same_succ same_succ, gimple s1, gimple s2)
1104 unsigned int i;
1105 tree lhs1, lhs2;
1106 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1107 tree t1, t2;
1108 bool inv_cond;
1109 enum tree_code code1, code2;
1111 if (gimple_code (s1) != gimple_code (s2))
1112 return false;
1114 switch (gimple_code (s1))
1116 case GIMPLE_CALL:
1117 if (gimple_call_num_args (s1) != gimple_call_num_args (s2))
1118 return false;
1119 if (!gimple_call_same_target_p (s1, s2))
1120 return false;
1122 for (i = 0; i < gimple_call_num_args (s1); ++i)
1124 t1 = gimple_call_arg (s1, i);
1125 t2 = gimple_call_arg (s2, i);
1126 if (gimple_operand_equal_value_p (t1, t2))
1127 continue;
1128 return false;
1131 lhs1 = gimple_get_lhs (s1);
1132 lhs2 = gimple_get_lhs (s2);
1133 if (lhs1 == NULL_TREE && lhs2 == NULL_TREE)
1134 return true;
1135 if (lhs1 == NULL_TREE || lhs2 == NULL_TREE)
1136 return false;
1137 if (TREE_CODE (lhs1) == SSA_NAME && TREE_CODE (lhs2) == SSA_NAME)
1138 return vn_valueize (lhs1) == vn_valueize (lhs2);
1139 return operand_equal_p (lhs1, lhs2, 0);
1141 case GIMPLE_ASSIGN:
1142 lhs1 = gimple_get_lhs (s1);
1143 lhs2 = gimple_get_lhs (s2);
1144 if (TREE_CODE (lhs1) != SSA_NAME
1145 && TREE_CODE (lhs2) != SSA_NAME)
1147 /* If the vdef is the same, it's the same statement. */
1148 if (vn_valueize (gimple_vdef (s1))
1149 == vn_valueize (gimple_vdef (s2)))
1150 return true;
1152 /* Test for structural equality. */
1153 return (operand_equal_p (lhs1, lhs2, 0)
1154 && gimple_operand_equal_value_p (gimple_assign_rhs1 (s1),
1155 gimple_assign_rhs1 (s2)));
1157 else if (TREE_CODE (lhs1) == SSA_NAME
1158 && TREE_CODE (lhs2) == SSA_NAME)
1159 return vn_valueize (lhs1) == vn_valueize (lhs2);
1160 return false;
1162 case GIMPLE_COND:
1163 t1 = gimple_cond_lhs (s1);
1164 t2 = gimple_cond_lhs (s2);
1165 if (!gimple_operand_equal_value_p (t1, t2))
1166 return false;
1168 t1 = gimple_cond_rhs (s1);
1169 t2 = gimple_cond_rhs (s2);
1170 if (!gimple_operand_equal_value_p (t1, t2))
1171 return false;
1173 code1 = gimple_expr_code (s1);
1174 code2 = gimple_expr_code (s2);
1175 inv_cond = (bitmap_bit_p (same_succ->inverse, bb1->index)
1176 != bitmap_bit_p (same_succ->inverse, bb2->index));
1177 if (inv_cond)
1179 bool honor_nans
1180 = HONOR_NANS (TYPE_MODE (TREE_TYPE (gimple_cond_lhs (s1))));
1181 code2 = invert_tree_comparison (code2, honor_nans);
1183 return code1 == code2;
1185 default:
1186 return false;
1190 /* Let GSI skip backwards over local defs. Return the earliest vuse in VUSE.
1191 Return true in VUSE_ESCAPED if the vuse influenced a SSA_OP_DEF of one of the
1192 processed statements. */
1194 static void
1195 gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator *gsi, tree *vuse,
1196 bool *vuse_escaped)
1198 gimple stmt;
1199 tree lvuse;
1201 while (true)
1203 if (gsi_end_p (*gsi))
1204 return;
1205 stmt = gsi_stmt (*gsi);
1207 lvuse = gimple_vuse (stmt);
1208 if (lvuse != NULL_TREE)
1210 *vuse = lvuse;
1211 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_DEF))
1212 *vuse_escaped = true;
1215 if (!stmt_local_def (stmt))
1216 return;
1217 gsi_prev_nondebug (gsi);
1221 /* Determines whether BB1 and BB2 (members of same_succ) are duplicates. If so,
1222 clusters them. */
1224 static void
1225 find_duplicate (same_succ same_succ, basic_block bb1, basic_block bb2)
1227 gimple_stmt_iterator gsi1 = gsi_last_nondebug_bb (bb1);
1228 gimple_stmt_iterator gsi2 = gsi_last_nondebug_bb (bb2);
1229 tree vuse1 = NULL_TREE, vuse2 = NULL_TREE;
1230 bool vuse_escaped = false;
1232 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1233 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1235 while (!gsi_end_p (gsi1) && !gsi_end_p (gsi2))
1237 gimple stmt1 = gsi_stmt (gsi1);
1238 gimple stmt2 = gsi_stmt (gsi2);
1240 /* What could be better than to this this here is to blacklist the bb
1241 containing the stmt, when encountering the stmt f.i. in
1242 same_succ_hash. */
1243 if (is_tm_ending (stmt1)
1244 || is_tm_ending (stmt2))
1245 return;
1247 if (!gimple_equal_p (same_succ, stmt1, stmt2))
1248 return;
1250 gsi_prev_nondebug (&gsi1);
1251 gsi_prev_nondebug (&gsi2);
1252 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1253 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1256 if (!(gsi_end_p (gsi1) && gsi_end_p (gsi2)))
1257 return;
1259 /* If the incoming vuses are not the same, and the vuse escaped into an
1260 SSA_OP_DEF, then merging the 2 blocks will change the value of the def,
1261 which potentially means the semantics of one of the blocks will be changed.
1262 TODO: make this check more precise. */
1263 if (vuse_escaped && vuse1 != vuse2)
1264 return;
1266 if (dump_file)
1267 fprintf (dump_file, "find_duplicates: <bb %d> duplicate of <bb %d>\n",
1268 bb1->index, bb2->index);
1270 set_cluster (bb1, bb2);
1273 /* Returns whether for all phis in DEST the phi alternatives for E1 and
1274 E2 are equal. */
1276 static bool
1277 same_phi_alternatives_1 (basic_block dest, edge e1, edge e2)
1279 int n1 = e1->dest_idx, n2 = e2->dest_idx;
1280 gimple_stmt_iterator gsi;
1282 for (gsi = gsi_start_phis (dest); !gsi_end_p (gsi); gsi_next (&gsi))
1284 gimple phi = gsi_stmt (gsi);
1285 tree lhs = gimple_phi_result (phi);
1286 tree val1 = gimple_phi_arg_def (phi, n1);
1287 tree val2 = gimple_phi_arg_def (phi, n2);
1289 if (virtual_operand_p (lhs))
1290 continue;
1292 if (operand_equal_for_phi_arg_p (val1, val2))
1293 continue;
1294 if (gvn_uses_equal (val1, val2))
1295 continue;
1297 return false;
1300 return true;
1303 /* Returns whether for all successors of BB1 and BB2 (members of SAME_SUCC), the
1304 phi alternatives for BB1 and BB2 are equal. */
1306 static bool
1307 same_phi_alternatives (same_succ same_succ, basic_block bb1, basic_block bb2)
1309 unsigned int s;
1310 bitmap_iterator bs;
1311 edge e1, e2;
1312 basic_block succ;
1314 EXECUTE_IF_SET_IN_BITMAP (same_succ->succs, 0, s, bs)
1316 succ = BASIC_BLOCK (s);
1317 e1 = find_edge (bb1, succ);
1318 e2 = find_edge (bb2, succ);
1319 if (e1->flags & EDGE_COMPLEX
1320 || e2->flags & EDGE_COMPLEX)
1321 return false;
1323 /* For all phis in bb, the phi alternatives for e1 and e2 need to have
1324 the same value. */
1325 if (!same_phi_alternatives_1 (succ, e1, e2))
1326 return false;
1329 return true;
1332 /* Return true if BB has non-vop phis. */
1334 static bool
1335 bb_has_non_vop_phi (basic_block bb)
1337 gimple_seq phis = phi_nodes (bb);
1338 gimple phi;
1340 if (phis == NULL)
1341 return false;
1343 if (!gimple_seq_singleton_p (phis))
1344 return true;
1346 phi = gimple_seq_first_stmt (phis);
1347 return !virtual_operand_p (gimple_phi_result (phi));
1350 /* Returns true if redirecting the incoming edges of FROM to TO maintains the
1351 invariant that uses in FROM are dominates by their defs. */
1353 static bool
1354 deps_ok_for_redirect_from_bb_to_bb (basic_block from, basic_block to)
1356 basic_block cd, dep_bb = BB_DEP_BB (to);
1357 edge_iterator ei;
1358 edge e;
1359 bitmap from_preds = BITMAP_ALLOC (NULL);
1361 if (dep_bb == NULL)
1362 return true;
1364 FOR_EACH_EDGE (e, ei, from->preds)
1365 bitmap_set_bit (from_preds, e->src->index);
1366 cd = nearest_common_dominator_for_set (CDI_DOMINATORS, from_preds);
1367 BITMAP_FREE (from_preds);
1369 return dominated_by_p (CDI_DOMINATORS, dep_bb, cd);
1372 /* Returns true if replacing BB1 (or its replacement bb) by BB2 (or its
1373 replacement bb) and vice versa maintains the invariant that uses in the
1374 replacement are dominates by their defs. */
1376 static bool
1377 deps_ok_for_redirect (basic_block bb1, basic_block bb2)
1379 if (BB_CLUSTER (bb1) != NULL)
1380 bb1 = BB_CLUSTER (bb1)->rep_bb;
1382 if (BB_CLUSTER (bb2) != NULL)
1383 bb2 = BB_CLUSTER (bb2)->rep_bb;
1385 return (deps_ok_for_redirect_from_bb_to_bb (bb1, bb2)
1386 && deps_ok_for_redirect_from_bb_to_bb (bb2, bb1));
1389 /* Within SAME_SUCC->bbs, find clusters of bbs which can be merged. */
1391 static void
1392 find_clusters_1 (same_succ same_succ)
1394 basic_block bb1, bb2;
1395 unsigned int i, j;
1396 bitmap_iterator bi, bj;
1397 int nr_comparisons;
1398 int max_comparisons = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_COMPARISONS);
1400 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, 0, i, bi)
1402 bb1 = BASIC_BLOCK (i);
1404 /* TODO: handle blocks with phi-nodes. We'll have to find corresponding
1405 phi-nodes in bb1 and bb2, with the same alternatives for the same
1406 preds. */
1407 if (bb_has_non_vop_phi (bb1))
1408 continue;
1410 nr_comparisons = 0;
1411 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, i + 1, j, bj)
1413 bb2 = BASIC_BLOCK (j);
1415 if (bb_has_non_vop_phi (bb2))
1416 continue;
1418 if (BB_CLUSTER (bb1) != NULL && BB_CLUSTER (bb1) == BB_CLUSTER (bb2))
1419 continue;
1421 /* Limit quadratic behaviour. */
1422 nr_comparisons++;
1423 if (nr_comparisons > max_comparisons)
1424 break;
1426 /* This is a conservative dependency check. We could test more
1427 precise for allowed replacement direction. */
1428 if (!deps_ok_for_redirect (bb1, bb2))
1429 continue;
1431 if (!(same_phi_alternatives (same_succ, bb1, bb2)))
1432 continue;
1434 find_duplicate (same_succ, bb1, bb2);
1439 /* Find clusters of bbs which can be merged. */
1441 static void
1442 find_clusters (void)
1444 same_succ same;
1446 while (!worklist.is_empty ())
1448 same = worklist.pop ();
1449 same->in_worklist = false;
1450 if (dump_file && (dump_flags & TDF_DETAILS))
1452 fprintf (dump_file, "processing worklist entry\n");
1453 same_succ_print (dump_file, same);
1455 find_clusters_1 (same);
1459 /* Returns the vop phi of BB, if any. */
1461 static gimple
1462 vop_phi (basic_block bb)
1464 gimple stmt;
1465 gimple_stmt_iterator gsi;
1466 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1468 stmt = gsi_stmt (gsi);
1469 if (! virtual_operand_p (gimple_phi_result (stmt)))
1470 continue;
1471 return stmt;
1473 return NULL;
1476 /* Redirect all edges from BB1 to BB2, removes BB1 and marks it as removed. */
1478 static void
1479 replace_block_by (basic_block bb1, basic_block bb2)
1481 edge pred_edge;
1482 edge e1, e2;
1483 edge_iterator ei;
1484 unsigned int i;
1485 gimple bb2_phi;
1487 bb2_phi = vop_phi (bb2);
1489 /* Mark the basic block as deleted. */
1490 mark_basic_block_deleted (bb1);
1492 /* Redirect the incoming edges of bb1 to bb2. */
1493 for (i = EDGE_COUNT (bb1->preds); i > 0 ; --i)
1495 pred_edge = EDGE_PRED (bb1, i - 1);
1496 pred_edge = redirect_edge_and_branch (pred_edge, bb2);
1497 gcc_assert (pred_edge != NULL);
1499 if (bb2_phi == NULL)
1500 continue;
1502 /* The phi might have run out of capacity when the redirect added an
1503 argument, which means it could have been replaced. Refresh it. */
1504 bb2_phi = vop_phi (bb2);
1506 add_phi_arg (bb2_phi, SSA_NAME_VAR (gimple_phi_result (bb2_phi)),
1507 pred_edge, UNKNOWN_LOCATION);
1510 bb2->frequency += bb1->frequency;
1511 if (bb2->frequency > BB_FREQ_MAX)
1512 bb2->frequency = BB_FREQ_MAX;
1514 bb2->count += bb1->count;
1516 /* Merge the outgoing edge counts from bb1 onto bb2. */
1517 gcov_type out_sum = 0;
1518 FOR_EACH_EDGE (e1, ei, bb1->succs)
1520 e2 = find_edge (bb2, e1->dest);
1521 gcc_assert (e2);
1522 e2->count += e1->count;
1523 out_sum += e2->count;
1525 /* Recompute the edge probabilities from the new merged edge count.
1526 Use the sum of the new merged edge counts computed above instead
1527 of bb2's merged count, in case there are profile count insanities
1528 making the bb count inconsistent with the edge weights. */
1529 FOR_EACH_EDGE (e2, ei, bb2->succs)
1531 e2->probability = GCOV_COMPUTE_SCALE (e2->count, out_sum);
1534 /* Do updates that use bb1, before deleting bb1. */
1535 release_last_vdef (bb1);
1536 same_succ_flush_bb (bb1);
1538 delete_basic_block (bb1);
1541 /* Bbs for which update_debug_stmt need to be called. */
1543 static bitmap update_bbs;
1545 /* For each cluster in all_clusters, merge all cluster->bbs. Returns
1546 number of bbs removed. */
1548 static int
1549 apply_clusters (void)
1551 basic_block bb1, bb2;
1552 bb_cluster c;
1553 unsigned int i, j;
1554 bitmap_iterator bj;
1555 int nr_bbs_removed = 0;
1557 for (i = 0; i < all_clusters.length (); ++i)
1559 c = all_clusters[i];
1560 if (c == NULL)
1561 continue;
1563 bb2 = c->rep_bb;
1564 bitmap_set_bit (update_bbs, bb2->index);
1566 bitmap_clear_bit (c->bbs, bb2->index);
1567 EXECUTE_IF_SET_IN_BITMAP (c->bbs, 0, j, bj)
1569 bb1 = BASIC_BLOCK (j);
1570 bitmap_clear_bit (update_bbs, bb1->index);
1572 replace_block_by (bb1, bb2);
1573 nr_bbs_removed++;
1577 return nr_bbs_removed;
1580 /* Resets debug statement STMT if it has uses that are not dominated by their
1581 defs. */
1583 static void
1584 update_debug_stmt (gimple stmt)
1586 use_operand_p use_p;
1587 ssa_op_iter oi;
1588 basic_block bbdef, bbuse;
1589 gimple def_stmt;
1590 tree name;
1592 if (!gimple_debug_bind_p (stmt))
1593 return;
1595 bbuse = gimple_bb (stmt);
1596 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE)
1598 name = USE_FROM_PTR (use_p);
1599 gcc_assert (TREE_CODE (name) == SSA_NAME);
1601 def_stmt = SSA_NAME_DEF_STMT (name);
1602 gcc_assert (def_stmt != NULL);
1604 bbdef = gimple_bb (def_stmt);
1605 if (bbdef == NULL || bbuse == bbdef
1606 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef))
1607 continue;
1609 gimple_debug_bind_reset_value (stmt);
1610 update_stmt (stmt);
1614 /* Resets all debug statements that have uses that are not
1615 dominated by their defs. */
1617 static void
1618 update_debug_stmts (void)
1620 basic_block bb;
1621 bitmap_iterator bi;
1622 unsigned int i;
1624 EXECUTE_IF_SET_IN_BITMAP (update_bbs, 0, i, bi)
1626 gimple stmt;
1627 gimple_stmt_iterator gsi;
1629 bb = BASIC_BLOCK (i);
1630 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1632 stmt = gsi_stmt (gsi);
1633 if (!is_gimple_debug (stmt))
1634 continue;
1635 update_debug_stmt (stmt);
1640 /* Runs tail merge optimization. */
1642 unsigned int
1643 tail_merge_optimize (unsigned int todo)
1645 int nr_bbs_removed_total = 0;
1646 int nr_bbs_removed;
1647 bool loop_entered = false;
1648 int iteration_nr = 0;
1649 int max_iterations = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_ITERATIONS);
1651 if (!flag_tree_tail_merge
1652 || max_iterations == 0
1653 /* We try to be conservative with respect to loop structure, since:
1654 - the cases where tail-merging could both affect loop structure and be
1655 beneficial are rare,
1656 - it prevents us from having to fixup the loops using
1657 loops_state_set (LOOPS_NEED_FIXUP), and
1658 - keeping loop structure may allow us to simplify the pass.
1659 In order to be conservative, we need loop information. In rare cases
1660 (about 7 test-cases in the g++ testsuite) there is none (because
1661 loop_optimizer_finalize has been called before tail-merge, and
1662 PROP_loops is not set), so we bail out. */
1663 || current_loops == NULL)
1664 return 0;
1666 timevar_push (TV_TREE_TAIL_MERGE);
1668 if (!dom_info_available_p (CDI_DOMINATORS))
1670 /* PRE can leave us with unreachable blocks, remove them now. */
1671 delete_unreachable_blocks ();
1672 calculate_dominance_info (CDI_DOMINATORS);
1674 init_worklist ();
1676 while (!worklist.is_empty ())
1678 if (!loop_entered)
1680 loop_entered = true;
1681 alloc_cluster_vectors ();
1682 update_bbs = BITMAP_ALLOC (NULL);
1684 else
1685 reset_cluster_vectors ();
1687 iteration_nr++;
1688 if (dump_file && (dump_flags & TDF_DETAILS))
1689 fprintf (dump_file, "worklist iteration #%d\n", iteration_nr);
1691 find_clusters ();
1692 gcc_assert (worklist.is_empty ());
1693 if (all_clusters.is_empty ())
1694 break;
1696 nr_bbs_removed = apply_clusters ();
1697 nr_bbs_removed_total += nr_bbs_removed;
1698 if (nr_bbs_removed == 0)
1699 break;
1701 free_dominance_info (CDI_DOMINATORS);
1703 if (iteration_nr == max_iterations)
1704 break;
1706 calculate_dominance_info (CDI_DOMINATORS);
1707 update_worklist ();
1710 if (dump_file && (dump_flags & TDF_DETAILS))
1711 fprintf (dump_file, "htab collision / search: %f\n",
1712 same_succ_htab.collisions ());
1714 if (nr_bbs_removed_total > 0)
1716 if (MAY_HAVE_DEBUG_STMTS)
1718 calculate_dominance_info (CDI_DOMINATORS);
1719 update_debug_stmts ();
1722 if (dump_file && (dump_flags & TDF_DETAILS))
1724 fprintf (dump_file, "Before TODOs.\n");
1725 dump_function_to_file (current_function_decl, dump_file, dump_flags);
1728 todo |= (TODO_verify_ssa | TODO_verify_stmts | TODO_verify_flow);
1729 mark_virtual_operands_for_renaming (cfun);
1732 delete_worklist ();
1733 if (loop_entered)
1735 delete_cluster_vectors ();
1736 BITMAP_FREE (update_bbs);
1739 timevar_pop (TV_TREE_TAIL_MERGE);
1741 return todo;