2012-09-15 Tom de Vries <tom@codesourcery.com>
[official-gcc.git] / gcc / tree-ssa-tail-merge.c
blob372096c344c4f60b5b53138580dc208d391ba717
1 /* Tail merging for gimple.
2 Copyright (C) 2011, 2012 Free Software Foundation, Inc.
3 Contributed by Tom de Vries (tom@codesourcery.com)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Pass overview.
24 MOTIVATIONAL EXAMPLE
26 gimple representation of gcc/testsuite/gcc.dg/pr43864.c at
28 hprofStartupp (charD.1 * outputFileNameD.2600, charD.1 * ctxD.2601)
30 struct FILED.1638 * fpD.2605;
31 charD.1 fileNameD.2604[1000];
32 intD.0 D.3915;
33 const charD.1 * restrict outputFileName.0D.3914;
35 # BLOCK 2 freq:10000
36 # PRED: ENTRY [100.0%] (fallthru,exec)
37 # PT = nonlocal { D.3926 } (restr)
38 outputFileName.0D.3914_3
39 = (const charD.1 * restrict) outputFileNameD.2600_2(D);
40 # .MEMD.3923_13 = VDEF <.MEMD.3923_12(D)>
41 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
42 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
43 sprintfD.759 (&fileNameD.2604, outputFileName.0D.3914_3);
44 # .MEMD.3923_14 = VDEF <.MEMD.3923_13>
45 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
46 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
47 D.3915_4 = accessD.2606 (&fileNameD.2604, 1);
48 if (D.3915_4 == 0)
49 goto <bb 3>;
50 else
51 goto <bb 4>;
52 # SUCC: 3 [10.0%] (true,exec) 4 [90.0%] (false,exec)
54 # BLOCK 3 freq:1000
55 # PRED: 2 [10.0%] (true,exec)
56 # .MEMD.3923_15 = VDEF <.MEMD.3923_14>
57 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
58 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
59 freeD.898 (ctxD.2601_5(D));
60 goto <bb 7>;
61 # SUCC: 7 [100.0%] (fallthru,exec)
63 # BLOCK 4 freq:9000
64 # PRED: 2 [90.0%] (false,exec)
65 # .MEMD.3923_16 = VDEF <.MEMD.3923_14>
66 # PT = nonlocal escaped
67 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
68 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
69 fpD.2605_8 = fopenD.1805 (&fileNameD.2604[0], 0B);
70 if (fpD.2605_8 == 0B)
71 goto <bb 5>;
72 else
73 goto <bb 6>;
74 # SUCC: 5 [1.9%] (true,exec) 6 [98.1%] (false,exec)
76 # BLOCK 5 freq:173
77 # PRED: 4 [1.9%] (true,exec)
78 # .MEMD.3923_17 = VDEF <.MEMD.3923_16>
79 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
80 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
81 freeD.898 (ctxD.2601_5(D));
82 goto <bb 7>;
83 # SUCC: 7 [100.0%] (fallthru,exec)
85 # BLOCK 6 freq:8827
86 # PRED: 4 [98.1%] (false,exec)
87 # .MEMD.3923_18 = VDEF <.MEMD.3923_16>
88 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
89 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
90 fooD.2599 (outputFileNameD.2600_2(D), fpD.2605_8);
91 # SUCC: 7 [100.0%] (fallthru,exec)
93 # BLOCK 7 freq:10000
94 # PRED: 3 [100.0%] (fallthru,exec) 5 [100.0%] (fallthru,exec)
95 6 [100.0%] (fallthru,exec)
96 # PT = nonlocal null
98 # ctxD.2601_1 = PHI <0B(3), 0B(5), ctxD.2601_5(D)(6)>
99 # .MEMD.3923_11 = PHI <.MEMD.3923_15(3), .MEMD.3923_17(5),
100 .MEMD.3923_18(6)>
101 # VUSE <.MEMD.3923_11>
102 return ctxD.2601_1;
103 # SUCC: EXIT [100.0%]
106 bb 3 and bb 5 can be merged. The blocks have different predecessors, but the
107 same successors, and the same operations.
110 CONTEXT
112 A technique called tail merging (or cross jumping) can fix the example
113 above. For a block, we look for common code at the end (the tail) of the
114 predecessor blocks, and insert jumps from one block to the other.
115 The example is a special case for tail merging, in that 2 whole blocks
116 can be merged, rather than just the end parts of it.
117 We currently only focus on whole block merging, so in that sense
118 calling this pass tail merge is a bit of a misnomer.
120 We distinguish 2 kinds of situations in which blocks can be merged:
121 - same operations, same predecessors. The successor edges coming from one
122 block are redirected to come from the other block.
123 - same operations, same successors. The predecessor edges entering one block
124 are redirected to enter the other block. Note that this operation might
125 involve introducing phi operations.
127 For efficient implementation, we would like to value numbers the blocks, and
128 have a comparison operator that tells us whether the blocks are equal.
129 Besides being runtime efficient, block value numbering should also abstract
130 from irrelevant differences in order of operations, much like normal value
131 numbering abstracts from irrelevant order of operations.
133 For the first situation (same_operations, same predecessors), normal value
134 numbering fits well. We can calculate a block value number based on the
135 value numbers of the defs and vdefs.
137 For the second situation (same operations, same successors), this approach
138 doesn't work so well. We can illustrate this using the example. The calls
139 to free use different vdefs: MEMD.3923_16 and MEMD.3923_14, and these will
140 remain different in value numbering, since they represent different memory
141 states. So the resulting vdefs of the frees will be different in value
142 numbering, so the block value numbers will be different.
144 The reason why we call the blocks equal is not because they define the same
145 values, but because uses in the blocks use (possibly different) defs in the
146 same way. To be able to detect this efficiently, we need to do some kind of
147 reverse value numbering, meaning number the uses rather than the defs, and
148 calculate a block value number based on the value number of the uses.
149 Ideally, a block comparison operator will also indicate which phis are needed
150 to merge the blocks.
152 For the moment, we don't do block value numbering, but we do insn-by-insn
153 matching, using scc value numbers to match operations with results, and
154 structural comparison otherwise, while ignoring vop mismatches.
157 IMPLEMENTATION
159 1. The pass first determines all groups of blocks with the same successor
160 blocks.
161 2. Within each group, it tries to determine clusters of equal basic blocks.
162 3. The clusters are applied.
163 4. The same successor groups are updated.
164 5. This process is repeated from 2 onwards, until no more changes.
167 LIMITATIONS/TODO
169 - block only
170 - handles only 'same operations, same successors'.
171 It handles same predecessors as a special subcase though.
172 - does not implement the reverse value numbering and block value numbering.
173 - improve memory allocation: use garbage collected memory, obstacks,
174 allocpools where appropriate.
175 - no insertion of gimple_reg phis, We only introduce vop-phis.
176 - handle blocks with gimple_reg phi_nodes.
179 SWITCHES
181 - ftree-tail-merge. On at -O2. We may have to enable it only at -Os. */
183 #include "config.h"
184 #include "system.h"
185 #include "coretypes.h"
186 #include "tm.h"
187 #include "tree.h"
188 #include "tm_p.h"
189 #include "basic-block.h"
190 #include "flags.h"
191 #include "function.h"
192 #include "tree-flow.h"
193 #include "bitmap.h"
194 #include "tree-ssa-alias.h"
195 #include "params.h"
196 #include "hash-table.h"
197 #include "gimple-pretty-print.h"
198 #include "tree-ssa-sccvn.h"
199 #include "tree-dump.h"
201 /* ??? This currently runs as part of tree-ssa-pre. Why is this not
202 a stand-alone GIMPLE pass? */
203 #include "tree-pass.h"
205 /* Describes a group of bbs with the same successors. The successor bbs are
206 cached in succs, and the successor edge flags are cached in succ_flags.
207 If a bb has the EDGE_TRUE/VALSE_VALUE flags swapped compared to succ_flags,
208 it's marked in inverse.
209 Additionally, the hash value for the struct is cached in hashval, and
210 in_worklist indicates whether it's currently part of worklist. */
212 struct same_succ_def
214 /* The bbs that have the same successor bbs. */
215 bitmap bbs;
216 /* The successor bbs. */
217 bitmap succs;
218 /* Indicates whether the EDGE_TRUE/FALSE_VALUEs of succ_flags are swapped for
219 bb. */
220 bitmap inverse;
221 /* The edge flags for each of the successor bbs. */
222 VEC (int, heap) *succ_flags;
223 /* Indicates whether the struct is currently in the worklist. */
224 bool in_worklist;
225 /* The hash value of the struct. */
226 hashval_t hashval;
228 /* hash_table support. */
229 typedef same_succ_def T;
230 static inline hashval_t hash (const same_succ_def *);
231 static int equal (const same_succ_def *, const same_succ_def *);
232 static void remove (same_succ_def *);
234 typedef struct same_succ_def *same_succ;
235 typedef const struct same_succ_def *const_same_succ;
237 /* hash routine for hash_table support, returns hashval of E. */
239 inline hashval_t
240 same_succ_def::hash (const same_succ_def *e)
242 return e->hashval;
245 /* A group of bbs where 1 bb from bbs can replace the other bbs. */
247 struct bb_cluster_def
249 /* The bbs in the cluster. */
250 bitmap bbs;
251 /* The preds of the bbs in the cluster. */
252 bitmap preds;
253 /* Index in all_clusters vector. */
254 int index;
255 /* The bb to replace the cluster with. */
256 basic_block rep_bb;
258 typedef struct bb_cluster_def *bb_cluster;
259 typedef const struct bb_cluster_def *const_bb_cluster;
261 /* Per bb-info. */
263 struct aux_bb_info
265 /* The number of non-debug statements in the bb. */
266 int size;
267 /* The same_succ that this bb is a member of. */
268 same_succ bb_same_succ;
269 /* The cluster that this bb is a member of. */
270 bb_cluster cluster;
271 /* The vop state at the exit of a bb. This is shortlived data, used to
272 communicate data between update_block_by and update_vuses. */
273 tree vop_at_exit;
274 /* The bb that either contains or is dominated by the dependencies of the
275 bb. */
276 basic_block dep_bb;
279 /* Macros to access the fields of struct aux_bb_info. */
281 #define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size)
282 #define BB_SAME_SUCC(bb) (((struct aux_bb_info *)bb->aux)->bb_same_succ)
283 #define BB_CLUSTER(bb) (((struct aux_bb_info *)bb->aux)->cluster)
284 #define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit)
285 #define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb)
287 /* Returns true if the only effect a statement STMT has, is to define locally
288 used SSA_NAMEs. */
290 static bool
291 stmt_local_def (gimple stmt)
293 basic_block bb, def_bb;
294 imm_use_iterator iter;
295 use_operand_p use_p;
296 tree val;
297 def_operand_p def_p;
299 if (gimple_has_side_effects (stmt))
300 return false;
302 def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF);
303 if (def_p == NULL)
304 return false;
306 val = DEF_FROM_PTR (def_p);
307 if (val == NULL_TREE || TREE_CODE (val) != SSA_NAME)
308 return false;
310 def_bb = gimple_bb (stmt);
312 FOR_EACH_IMM_USE_FAST (use_p, iter, val)
314 if (is_gimple_debug (USE_STMT (use_p)))
315 continue;
316 bb = gimple_bb (USE_STMT (use_p));
317 if (bb == def_bb)
318 continue;
320 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
321 && EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src == def_bb)
322 continue;
324 return false;
327 return true;
330 /* Let GSI skip forwards over local defs. */
332 static void
333 gsi_advance_fw_nondebug_nonlocal (gimple_stmt_iterator *gsi)
335 gimple stmt;
337 while (true)
339 if (gsi_end_p (*gsi))
340 return;
341 stmt = gsi_stmt (*gsi);
342 if (!stmt_local_def (stmt))
343 return;
344 gsi_next_nondebug (gsi);
348 /* VAL1 and VAL2 are either:
349 - uses in BB1 and BB2, or
350 - phi alternatives for BB1 and BB2.
351 Return true if the uses have the same gvn value. */
353 static bool
354 gvn_uses_equal (tree val1, tree val2)
356 gcc_checking_assert (val1 != NULL_TREE && val2 != NULL_TREE);
358 if (val1 == val2)
359 return true;
361 if (vn_valueize (val1) != vn_valueize (val2))
362 return false;
364 return ((TREE_CODE (val1) == SSA_NAME || CONSTANT_CLASS_P (val1))
365 && (TREE_CODE (val2) == SSA_NAME || CONSTANT_CLASS_P (val2)));
368 /* Prints E to FILE. */
370 static void
371 same_succ_print (FILE *file, const same_succ e)
373 unsigned int i;
374 bitmap_print (file, e->bbs, "bbs:", "\n");
375 bitmap_print (file, e->succs, "succs:", "\n");
376 bitmap_print (file, e->inverse, "inverse:", "\n");
377 fprintf (file, "flags:");
378 for (i = 0; i < VEC_length (int, e->succ_flags); ++i)
379 fprintf (file, " %x", VEC_index (int, e->succ_flags, i));
380 fprintf (file, "\n");
383 /* Prints same_succ VE to VFILE. */
385 inline int
386 ssa_same_succ_print_traverse (same_succ *pe, FILE *file)
388 const same_succ e = *pe;
389 same_succ_print (file, e);
390 return 1;
393 /* Update BB_DEP_BB (USE_BB), given a use of VAL in USE_BB. */
395 static void
396 update_dep_bb (basic_block use_bb, tree val)
398 basic_block dep_bb;
400 /* Not a dep. */
401 if (TREE_CODE (val) != SSA_NAME)
402 return;
404 /* Skip use of global def. */
405 if (SSA_NAME_IS_DEFAULT_DEF (val))
406 return;
408 /* Skip use of local def. */
409 dep_bb = gimple_bb (SSA_NAME_DEF_STMT (val));
410 if (dep_bb == use_bb)
411 return;
413 if (BB_DEP_BB (use_bb) == NULL
414 || dominated_by_p (CDI_DOMINATORS, dep_bb, BB_DEP_BB (use_bb)))
415 BB_DEP_BB (use_bb) = dep_bb;
418 /* Update BB_DEP_BB, given the dependencies in STMT. */
420 static void
421 stmt_update_dep_bb (gimple stmt)
423 ssa_op_iter iter;
424 use_operand_p use;
426 FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE)
427 update_dep_bb (gimple_bb (stmt), USE_FROM_PTR (use));
430 /* Calculates hash value for same_succ VE. */
432 static hashval_t
433 same_succ_hash (const_same_succ e)
435 hashval_t hashval = bitmap_hash (e->succs);
436 int flags;
437 unsigned int i;
438 unsigned int first = bitmap_first_set_bit (e->bbs);
439 basic_block bb = BASIC_BLOCK (first);
440 int size = 0;
441 gimple_stmt_iterator gsi;
442 gimple stmt;
443 tree arg;
444 unsigned int s;
445 bitmap_iterator bs;
447 for (gsi = gsi_start_nondebug_bb (bb);
448 !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
450 stmt = gsi_stmt (gsi);
451 stmt_update_dep_bb (stmt);
452 if (stmt_local_def (stmt))
453 continue;
454 size++;
456 hashval = iterative_hash_hashval_t (gimple_code (stmt), hashval);
457 if (is_gimple_assign (stmt))
458 hashval = iterative_hash_hashval_t (gimple_assign_rhs_code (stmt),
459 hashval);
460 if (!is_gimple_call (stmt))
461 continue;
462 if (gimple_call_internal_p (stmt))
463 hashval = iterative_hash_hashval_t
464 ((hashval_t) gimple_call_internal_fn (stmt), hashval);
465 else
466 hashval = iterative_hash_expr (gimple_call_fn (stmt), hashval);
467 for (i = 0; i < gimple_call_num_args (stmt); i++)
469 arg = gimple_call_arg (stmt, i);
470 arg = vn_valueize (arg);
471 hashval = iterative_hash_expr (arg, hashval);
475 hashval = iterative_hash_hashval_t (size, hashval);
476 BB_SIZE (bb) = size;
478 for (i = 0; i < VEC_length (int, e->succ_flags); ++i)
480 flags = VEC_index (int, e->succ_flags, i);
481 flags = flags & ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
482 hashval = iterative_hash_hashval_t (flags, hashval);
485 EXECUTE_IF_SET_IN_BITMAP (e->succs, 0, s, bs)
487 int n = find_edge (bb, BASIC_BLOCK (s))->dest_idx;
488 for (gsi = gsi_start_phis (BASIC_BLOCK (s)); !gsi_end_p (gsi);
489 gsi_next (&gsi))
491 gimple phi = gsi_stmt (gsi);
492 tree lhs = gimple_phi_result (phi);
493 tree val = gimple_phi_arg_def (phi, n);
495 if (virtual_operand_p (lhs))
496 continue;
497 update_dep_bb (bb, val);
501 return hashval;
504 /* Returns true if E1 and E2 have 2 successors, and if the successor flags
505 are inverse for the EDGE_TRUE_VALUE and EDGE_FALSE_VALUE flags, and equal for
506 the other edge flags. */
508 static bool
509 inverse_flags (const_same_succ e1, const_same_succ e2)
511 int f1a, f1b, f2a, f2b;
512 int mask = ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
514 if (VEC_length (int, e1->succ_flags) != 2)
515 return false;
517 f1a = VEC_index (int, e1->succ_flags, 0);
518 f1b = VEC_index (int, e1->succ_flags, 1);
519 f2a = VEC_index (int, e2->succ_flags, 0);
520 f2b = VEC_index (int, e2->succ_flags, 1);
522 if (f1a == f2a && f1b == f2b)
523 return false;
525 return (f1a & mask) == (f2a & mask) && (f1b & mask) == (f2b & mask);
528 /* Compares SAME_SUCCs E1 and E2. */
531 same_succ_def::equal (const_same_succ e1, const_same_succ e2)
533 unsigned int i, first1, first2;
534 gimple_stmt_iterator gsi1, gsi2;
535 gimple s1, s2;
536 basic_block bb1, bb2;
538 if (e1->hashval != e2->hashval)
539 return 0;
541 if (VEC_length (int, e1->succ_flags) != VEC_length (int, e2->succ_flags))
542 return 0;
544 if (!bitmap_equal_p (e1->succs, e2->succs))
545 return 0;
547 if (!inverse_flags (e1, e2))
549 for (i = 0; i < VEC_length (int, e1->succ_flags); ++i)
550 if (VEC_index (int, e1->succ_flags, i)
551 != VEC_index (int, e1->succ_flags, i))
552 return 0;
555 first1 = bitmap_first_set_bit (e1->bbs);
556 first2 = bitmap_first_set_bit (e2->bbs);
558 bb1 = BASIC_BLOCK (first1);
559 bb2 = BASIC_BLOCK (first2);
561 if (BB_SIZE (bb1) != BB_SIZE (bb2))
562 return 0;
564 gsi1 = gsi_start_nondebug_bb (bb1);
565 gsi2 = gsi_start_nondebug_bb (bb2);
566 gsi_advance_fw_nondebug_nonlocal (&gsi1);
567 gsi_advance_fw_nondebug_nonlocal (&gsi2);
568 while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2)))
570 s1 = gsi_stmt (gsi1);
571 s2 = gsi_stmt (gsi2);
572 if (gimple_code (s1) != gimple_code (s2))
573 return 0;
574 if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2))
575 return 0;
576 gsi_next_nondebug (&gsi1);
577 gsi_next_nondebug (&gsi2);
578 gsi_advance_fw_nondebug_nonlocal (&gsi1);
579 gsi_advance_fw_nondebug_nonlocal (&gsi2);
582 return 1;
585 /* Alloc and init a new SAME_SUCC. */
587 static same_succ
588 same_succ_alloc (void)
590 same_succ same = XNEW (struct same_succ_def);
592 same->bbs = BITMAP_ALLOC (NULL);
593 same->succs = BITMAP_ALLOC (NULL);
594 same->inverse = BITMAP_ALLOC (NULL);
595 same->succ_flags = VEC_alloc (int, heap, 10);
596 same->in_worklist = false;
598 return same;
601 /* Delete same_succ E. */
603 void
604 same_succ_def::remove (same_succ e)
606 BITMAP_FREE (e->bbs);
607 BITMAP_FREE (e->succs);
608 BITMAP_FREE (e->inverse);
609 VEC_free (int, heap, e->succ_flags);
611 XDELETE (e);
614 /* Reset same_succ SAME. */
616 static void
617 same_succ_reset (same_succ same)
619 bitmap_clear (same->bbs);
620 bitmap_clear (same->succs);
621 bitmap_clear (same->inverse);
622 VEC_truncate (int, same->succ_flags, 0);
625 static hash_table <same_succ_def> same_succ_htab;
627 /* Array that is used to store the edge flags for a successor. */
629 static int *same_succ_edge_flags;
631 /* Bitmap that is used to mark bbs that are recently deleted. */
633 static bitmap deleted_bbs;
635 /* Bitmap that is used to mark predecessors of bbs that are
636 deleted. */
638 static bitmap deleted_bb_preds;
640 /* Prints same_succ_htab to stderr. */
642 extern void debug_same_succ (void);
643 DEBUG_FUNCTION void
644 debug_same_succ ( void)
646 same_succ_htab.traverse <FILE *, ssa_same_succ_print_traverse> (stderr);
649 DEF_VEC_P (same_succ);
650 DEF_VEC_ALLOC_P (same_succ, heap);
652 /* Vector of bbs to process. */
654 static VEC (same_succ, heap) *worklist;
656 /* Prints worklist to FILE. */
658 static void
659 print_worklist (FILE *file)
661 unsigned int i;
662 for (i = 0; i < VEC_length (same_succ, worklist); ++i)
663 same_succ_print (file, VEC_index (same_succ, worklist, i));
666 /* Adds SAME to worklist. */
668 static void
669 add_to_worklist (same_succ same)
671 if (same->in_worklist)
672 return;
674 if (bitmap_count_bits (same->bbs) < 2)
675 return;
677 same->in_worklist = true;
678 VEC_safe_push (same_succ, heap, worklist, same);
681 /* Add BB to same_succ_htab. */
683 static void
684 find_same_succ_bb (basic_block bb, same_succ *same_p)
686 unsigned int j;
687 bitmap_iterator bj;
688 same_succ same = *same_p;
689 same_succ *slot;
690 edge_iterator ei;
691 edge e;
693 if (bb == NULL)
694 return;
695 bitmap_set_bit (same->bbs, bb->index);
696 FOR_EACH_EDGE (e, ei, bb->succs)
698 int index = e->dest->index;
699 bitmap_set_bit (same->succs, index);
700 same_succ_edge_flags[index] = e->flags;
702 EXECUTE_IF_SET_IN_BITMAP (same->succs, 0, j, bj)
703 VEC_safe_push (int, heap, same->succ_flags, same_succ_edge_flags[j]);
705 same->hashval = same_succ_hash (same);
707 slot = same_succ_htab.find_slot_with_hash (same, same->hashval, INSERT);
708 if (*slot == NULL)
710 *slot = same;
711 BB_SAME_SUCC (bb) = same;
712 add_to_worklist (same);
713 *same_p = NULL;
715 else
717 bitmap_set_bit ((*slot)->bbs, bb->index);
718 BB_SAME_SUCC (bb) = *slot;
719 add_to_worklist (*slot);
720 if (inverse_flags (same, *slot))
721 bitmap_set_bit ((*slot)->inverse, bb->index);
722 same_succ_reset (same);
726 /* Find bbs with same successors. */
728 static void
729 find_same_succ (void)
731 same_succ same = same_succ_alloc ();
732 basic_block bb;
734 FOR_EACH_BB (bb)
736 find_same_succ_bb (bb, &same);
737 if (same == NULL)
738 same = same_succ_alloc ();
741 same_succ_def::remove (same);
744 /* Initializes worklist administration. */
746 static void
747 init_worklist (void)
749 alloc_aux_for_blocks (sizeof (struct aux_bb_info));
750 same_succ_htab.create (n_basic_blocks);
751 same_succ_edge_flags = XCNEWVEC (int, last_basic_block);
752 deleted_bbs = BITMAP_ALLOC (NULL);
753 deleted_bb_preds = BITMAP_ALLOC (NULL);
754 worklist = VEC_alloc (same_succ, heap, n_basic_blocks);
755 find_same_succ ();
757 if (dump_file && (dump_flags & TDF_DETAILS))
759 fprintf (dump_file, "initial worklist:\n");
760 print_worklist (dump_file);
764 /* Deletes worklist administration. */
766 static void
767 delete_worklist (void)
769 free_aux_for_blocks ();
770 same_succ_htab.dispose ();
771 XDELETEVEC (same_succ_edge_flags);
772 same_succ_edge_flags = NULL;
773 BITMAP_FREE (deleted_bbs);
774 BITMAP_FREE (deleted_bb_preds);
775 VEC_free (same_succ, heap, worklist);
778 /* Mark BB as deleted, and mark its predecessors. */
780 static void
781 mark_basic_block_deleted (basic_block bb)
783 edge e;
784 edge_iterator ei;
786 bitmap_set_bit (deleted_bbs, bb->index);
788 FOR_EACH_EDGE (e, ei, bb->preds)
789 bitmap_set_bit (deleted_bb_preds, e->src->index);
792 /* Removes BB from its corresponding same_succ. */
794 static void
795 same_succ_flush_bb (basic_block bb)
797 same_succ same = BB_SAME_SUCC (bb);
798 BB_SAME_SUCC (bb) = NULL;
799 if (bitmap_single_bit_set_p (same->bbs))
800 same_succ_htab.remove_elt_with_hash (same, same->hashval);
801 else
802 bitmap_clear_bit (same->bbs, bb->index);
805 /* Removes all bbs in BBS from their corresponding same_succ. */
807 static void
808 same_succ_flush_bbs (bitmap bbs)
810 unsigned int i;
811 bitmap_iterator bi;
813 EXECUTE_IF_SET_IN_BITMAP (bbs, 0, i, bi)
814 same_succ_flush_bb (BASIC_BLOCK (i));
817 /* Release the last vdef in BB, either normal or phi result. */
819 static void
820 release_last_vdef (basic_block bb)
822 gimple_stmt_iterator i;
824 for (i = gsi_last_bb (bb); !gsi_end_p (i); gsi_prev_nondebug (&i))
826 gimple stmt = gsi_stmt (i);
827 if (gimple_vdef (stmt) == NULL_TREE)
828 continue;
830 mark_virtual_operand_for_renaming (gimple_vdef (stmt));
831 return;
834 for (i = gsi_start_phis (bb); !gsi_end_p (i); gsi_next (&i))
836 gimple phi = gsi_stmt (i);
837 tree res = gimple_phi_result (phi);
839 if (!virtual_operand_p (res))
840 continue;
842 mark_virtual_phi_result_for_renaming (phi);
843 return;
848 /* For deleted_bb_preds, find bbs with same successors. */
850 static void
851 update_worklist (void)
853 unsigned int i;
854 bitmap_iterator bi;
855 basic_block bb;
856 same_succ same;
858 bitmap_and_compl_into (deleted_bb_preds, deleted_bbs);
859 bitmap_clear (deleted_bbs);
861 bitmap_clear_bit (deleted_bb_preds, ENTRY_BLOCK);
862 same_succ_flush_bbs (deleted_bb_preds);
864 same = same_succ_alloc ();
865 EXECUTE_IF_SET_IN_BITMAP (deleted_bb_preds, 0, i, bi)
867 bb = BASIC_BLOCK (i);
868 gcc_assert (bb != NULL);
869 find_same_succ_bb (bb, &same);
870 if (same == NULL)
871 same = same_succ_alloc ();
873 same_succ_def::remove (same);
874 bitmap_clear (deleted_bb_preds);
877 /* Prints cluster C to FILE. */
879 static void
880 print_cluster (FILE *file, bb_cluster c)
882 if (c == NULL)
883 return;
884 bitmap_print (file, c->bbs, "bbs:", "\n");
885 bitmap_print (file, c->preds, "preds:", "\n");
888 /* Prints cluster C to stderr. */
890 extern void debug_cluster (bb_cluster);
891 DEBUG_FUNCTION void
892 debug_cluster (bb_cluster c)
894 print_cluster (stderr, c);
897 /* Update C->rep_bb, given that BB is added to the cluster. */
899 static void
900 update_rep_bb (bb_cluster c, basic_block bb)
902 /* Initial. */
903 if (c->rep_bb == NULL)
905 c->rep_bb = bb;
906 return;
909 /* Current needs no deps, keep it. */
910 if (BB_DEP_BB (c->rep_bb) == NULL)
911 return;
913 /* Bb needs no deps, change rep_bb. */
914 if (BB_DEP_BB (bb) == NULL)
916 c->rep_bb = bb;
917 return;
920 /* Bb needs last deps earlier than current, change rep_bb. A potential
921 problem with this, is that the first deps might also be earlier, which
922 would mean we prefer longer lifetimes for the deps. To be able to check
923 for this, we would have to trace BB_FIRST_DEP_BB as well, besides
924 BB_DEP_BB, which is really BB_LAST_DEP_BB.
925 The benefit of choosing the bb with last deps earlier, is that it can
926 potentially be used as replacement for more bbs. */
927 if (dominated_by_p (CDI_DOMINATORS, BB_DEP_BB (c->rep_bb), BB_DEP_BB (bb)))
928 c->rep_bb = bb;
931 /* Add BB to cluster C. Sets BB in C->bbs, and preds of BB in C->preds. */
933 static void
934 add_bb_to_cluster (bb_cluster c, basic_block bb)
936 edge e;
937 edge_iterator ei;
939 bitmap_set_bit (c->bbs, bb->index);
941 FOR_EACH_EDGE (e, ei, bb->preds)
942 bitmap_set_bit (c->preds, e->src->index);
944 update_rep_bb (c, bb);
947 /* Allocate and init new cluster. */
949 static bb_cluster
950 new_cluster (void)
952 bb_cluster c;
953 c = XCNEW (struct bb_cluster_def);
954 c->bbs = BITMAP_ALLOC (NULL);
955 c->preds = BITMAP_ALLOC (NULL);
956 c->rep_bb = NULL;
957 return c;
960 /* Delete clusters. */
962 static void
963 delete_cluster (bb_cluster c)
965 if (c == NULL)
966 return;
967 BITMAP_FREE (c->bbs);
968 BITMAP_FREE (c->preds);
969 XDELETE (c);
972 DEF_VEC_P (bb_cluster);
973 DEF_VEC_ALLOC_P (bb_cluster, heap);
975 /* Array that contains all clusters. */
977 static VEC (bb_cluster, heap) *all_clusters;
979 /* Allocate all cluster vectors. */
981 static void
982 alloc_cluster_vectors (void)
984 all_clusters = VEC_alloc (bb_cluster, heap, n_basic_blocks);
987 /* Reset all cluster vectors. */
989 static void
990 reset_cluster_vectors (void)
992 unsigned int i;
993 basic_block bb;
994 for (i = 0; i < VEC_length (bb_cluster, all_clusters); ++i)
995 delete_cluster (VEC_index (bb_cluster, all_clusters, i));
996 VEC_truncate (bb_cluster, all_clusters, 0);
997 FOR_EACH_BB (bb)
998 BB_CLUSTER (bb) = NULL;
1001 /* Delete all cluster vectors. */
1003 static void
1004 delete_cluster_vectors (void)
1006 unsigned int i;
1007 for (i = 0; i < VEC_length (bb_cluster, all_clusters); ++i)
1008 delete_cluster (VEC_index (bb_cluster, all_clusters, i));
1009 VEC_free (bb_cluster, heap, all_clusters);
1012 /* Merge cluster C2 into C1. */
1014 static void
1015 merge_clusters (bb_cluster c1, bb_cluster c2)
1017 bitmap_ior_into (c1->bbs, c2->bbs);
1018 bitmap_ior_into (c1->preds, c2->preds);
1021 /* Register equivalence of BB1 and BB2 (members of cluster C). Store c in
1022 all_clusters, or merge c with existing cluster. */
1024 static void
1025 set_cluster (basic_block bb1, basic_block bb2)
1027 basic_block merge_bb, other_bb;
1028 bb_cluster merge, old, c;
1030 if (BB_CLUSTER (bb1) == NULL && BB_CLUSTER (bb2) == NULL)
1032 c = new_cluster ();
1033 add_bb_to_cluster (c, bb1);
1034 add_bb_to_cluster (c, bb2);
1035 BB_CLUSTER (bb1) = c;
1036 BB_CLUSTER (bb2) = c;
1037 c->index = VEC_length (bb_cluster, all_clusters);
1038 VEC_safe_push (bb_cluster, heap, all_clusters, c);
1040 else if (BB_CLUSTER (bb1) == NULL || BB_CLUSTER (bb2) == NULL)
1042 merge_bb = BB_CLUSTER (bb1) == NULL ? bb2 : bb1;
1043 other_bb = BB_CLUSTER (bb1) == NULL ? bb1 : bb2;
1044 merge = BB_CLUSTER (merge_bb);
1045 add_bb_to_cluster (merge, other_bb);
1046 BB_CLUSTER (other_bb) = merge;
1048 else if (BB_CLUSTER (bb1) != BB_CLUSTER (bb2))
1050 unsigned int i;
1051 bitmap_iterator bi;
1053 old = BB_CLUSTER (bb2);
1054 merge = BB_CLUSTER (bb1);
1055 merge_clusters (merge, old);
1056 EXECUTE_IF_SET_IN_BITMAP (old->bbs, 0, i, bi)
1057 BB_CLUSTER (BASIC_BLOCK (i)) = merge;
1058 VEC_replace (bb_cluster, all_clusters, old->index, NULL);
1059 update_rep_bb (merge, old->rep_bb);
1060 delete_cluster (old);
1062 else
1063 gcc_unreachable ();
1066 /* Return true if gimple statements S1 and S2 are equal. Gimple_bb (s1) and
1067 gimple_bb (s2) are members of SAME_SUCC. */
1069 static bool
1070 gimple_equal_p (same_succ same_succ, gimple s1, gimple s2)
1072 unsigned int i;
1073 tree lhs1, lhs2;
1074 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1075 tree t1, t2;
1076 bool equal, inv_cond;
1077 enum tree_code code1, code2;
1079 if (gimple_code (s1) != gimple_code (s2))
1080 return false;
1082 switch (gimple_code (s1))
1084 case GIMPLE_CALL:
1085 if (gimple_call_num_args (s1) != gimple_call_num_args (s2))
1086 return false;
1087 if (!gimple_call_same_target_p (s1, s2))
1088 return false;
1090 /* Eventually, we'll significantly complicate the CFG by adding
1091 back edges to properly model the effects of transaction restart.
1092 For the bulk of optimization this does not matter, but what we
1093 cannot recover from is tail merging blocks between two separate
1094 transactions. Avoid that by making commit not match. */
1095 if (gimple_call_builtin_p (s1, BUILT_IN_TM_COMMIT))
1096 return false;
1098 equal = true;
1099 for (i = 0; i < gimple_call_num_args (s1); ++i)
1101 t1 = gimple_call_arg (s1, i);
1102 t2 = gimple_call_arg (s2, i);
1103 if (operand_equal_p (t1, t2, 0))
1104 continue;
1105 if (gvn_uses_equal (t1, t2))
1106 continue;
1107 equal = false;
1108 break;
1110 if (!equal)
1111 return false;
1113 lhs1 = gimple_get_lhs (s1);
1114 lhs2 = gimple_get_lhs (s2);
1115 if (lhs1 == NULL_TREE && lhs2 == NULL_TREE)
1116 return true;
1117 if (lhs1 == NULL_TREE || lhs2 == NULL_TREE)
1118 return false;
1119 if (TREE_CODE (lhs1) == SSA_NAME && TREE_CODE (lhs2) == SSA_NAME)
1120 return vn_valueize (lhs1) == vn_valueize (lhs2);
1121 return operand_equal_p (lhs1, lhs2, 0);
1123 case GIMPLE_ASSIGN:
1124 lhs1 = gimple_get_lhs (s1);
1125 lhs2 = gimple_get_lhs (s2);
1126 if (gimple_vdef (s1))
1128 if (vn_valueize (gimple_vdef (s1)) != vn_valueize (gimple_vdef (s2)))
1129 return false;
1130 if (TREE_CODE (lhs1) != SSA_NAME
1131 && TREE_CODE (lhs2) != SSA_NAME)
1132 return true;
1134 return (TREE_CODE (lhs1) == SSA_NAME
1135 && TREE_CODE (lhs2) == SSA_NAME
1136 && vn_valueize (lhs1) == vn_valueize (lhs2));
1138 case GIMPLE_COND:
1139 t1 = gimple_cond_lhs (s1);
1140 t2 = gimple_cond_lhs (s2);
1141 if (!operand_equal_p (t1, t2, 0)
1142 && !gvn_uses_equal (t1, t2))
1143 return false;
1145 t1 = gimple_cond_rhs (s1);
1146 t2 = gimple_cond_rhs (s2);
1147 if (!operand_equal_p (t1, t2, 0)
1148 && !gvn_uses_equal (t1, t2))
1149 return false;
1151 code1 = gimple_expr_code (s1);
1152 code2 = gimple_expr_code (s2);
1153 inv_cond = (bitmap_bit_p (same_succ->inverse, bb1->index)
1154 != bitmap_bit_p (same_succ->inverse, bb2->index));
1155 if (inv_cond)
1157 bool honor_nans
1158 = HONOR_NANS (TYPE_MODE (TREE_TYPE (gimple_cond_lhs (s1))));
1159 code2 = invert_tree_comparison (code2, honor_nans);
1161 return code1 == code2;
1163 default:
1164 return false;
1168 /* Let GSI skip backwards over local defs. Return the earliest vuse in VUSE.
1169 Return true in VUSE_ESCAPED if the vuse influenced a SSA_OP_DEF of one of the
1170 processed statements. */
1172 static void
1173 gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator *gsi, tree *vuse,
1174 bool *vuse_escaped)
1176 gimple stmt;
1177 tree lvuse;
1179 while (true)
1181 if (gsi_end_p (*gsi))
1182 return;
1183 stmt = gsi_stmt (*gsi);
1185 lvuse = gimple_vuse (stmt);
1186 if (lvuse != NULL_TREE)
1188 *vuse = lvuse;
1189 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_DEF))
1190 *vuse_escaped = true;
1193 if (!stmt_local_def (stmt))
1194 return;
1195 gsi_prev_nondebug (gsi);
1199 /* Determines whether BB1 and BB2 (members of same_succ) are duplicates. If so,
1200 clusters them. */
1202 static void
1203 find_duplicate (same_succ same_succ, basic_block bb1, basic_block bb2)
1205 gimple_stmt_iterator gsi1 = gsi_last_nondebug_bb (bb1);
1206 gimple_stmt_iterator gsi2 = gsi_last_nondebug_bb (bb2);
1207 tree vuse1 = NULL_TREE, vuse2 = NULL_TREE;
1208 bool vuse_escaped = false;
1210 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1211 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1213 while (!gsi_end_p (gsi1) && !gsi_end_p (gsi2))
1215 if (!gimple_equal_p (same_succ, gsi_stmt (gsi1), gsi_stmt (gsi2)))
1216 return;
1218 gsi_prev_nondebug (&gsi1);
1219 gsi_prev_nondebug (&gsi2);
1220 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1221 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1224 if (!(gsi_end_p (gsi1) && gsi_end_p (gsi2)))
1225 return;
1227 /* If the incoming vuses are not the same, and the vuse escaped into an
1228 SSA_OP_DEF, then merging the 2 blocks will change the value of the def,
1229 which potentially means the semantics of one of the blocks will be changed.
1230 TODO: make this check more precise. */
1231 if (vuse_escaped && vuse1 != vuse2)
1232 return;
1234 if (dump_file)
1235 fprintf (dump_file, "find_duplicates: <bb %d> duplicate of <bb %d>\n",
1236 bb1->index, bb2->index);
1238 set_cluster (bb1, bb2);
1241 /* Returns whether for all phis in DEST the phi alternatives for E1 and
1242 E2 are equal. */
1244 static bool
1245 same_phi_alternatives_1 (basic_block dest, edge e1, edge e2)
1247 int n1 = e1->dest_idx, n2 = e2->dest_idx;
1248 gimple_stmt_iterator gsi;
1250 for (gsi = gsi_start_phis (dest); !gsi_end_p (gsi); gsi_next (&gsi))
1252 gimple phi = gsi_stmt (gsi);
1253 tree lhs = gimple_phi_result (phi);
1254 tree val1 = gimple_phi_arg_def (phi, n1);
1255 tree val2 = gimple_phi_arg_def (phi, n2);
1257 if (virtual_operand_p (lhs))
1258 continue;
1260 if (operand_equal_for_phi_arg_p (val1, val2))
1261 continue;
1262 if (gvn_uses_equal (val1, val2))
1263 continue;
1265 return false;
1268 return true;
1271 /* Returns whether for all successors of BB1 and BB2 (members of SAME_SUCC), the
1272 phi alternatives for BB1 and BB2 are equal. */
1274 static bool
1275 same_phi_alternatives (same_succ same_succ, basic_block bb1, basic_block bb2)
1277 unsigned int s;
1278 bitmap_iterator bs;
1279 edge e1, e2;
1280 basic_block succ;
1282 EXECUTE_IF_SET_IN_BITMAP (same_succ->succs, 0, s, bs)
1284 succ = BASIC_BLOCK (s);
1285 e1 = find_edge (bb1, succ);
1286 e2 = find_edge (bb2, succ);
1287 if (e1->flags & EDGE_COMPLEX
1288 || e2->flags & EDGE_COMPLEX)
1289 return false;
1291 /* For all phis in bb, the phi alternatives for e1 and e2 need to have
1292 the same value. */
1293 if (!same_phi_alternatives_1 (succ, e1, e2))
1294 return false;
1297 return true;
1300 /* Return true if BB has non-vop phis. */
1302 static bool
1303 bb_has_non_vop_phi (basic_block bb)
1305 gimple_seq phis = phi_nodes (bb);
1306 gimple phi;
1308 if (phis == NULL)
1309 return false;
1311 if (!gimple_seq_singleton_p (phis))
1312 return true;
1314 phi = gimple_seq_first_stmt (phis);
1315 return !virtual_operand_p (gimple_phi_result (phi));
1318 /* Returns true if redirecting the incoming edges of FROM to TO maintains the
1319 invariant that uses in FROM are dominates by their defs. */
1321 static bool
1322 deps_ok_for_redirect_from_bb_to_bb (basic_block from, basic_block to)
1324 basic_block cd, dep_bb = BB_DEP_BB (to);
1325 edge_iterator ei;
1326 edge e;
1327 bitmap from_preds = BITMAP_ALLOC (NULL);
1329 if (dep_bb == NULL)
1330 return true;
1332 FOR_EACH_EDGE (e, ei, from->preds)
1333 bitmap_set_bit (from_preds, e->src->index);
1334 cd = nearest_common_dominator_for_set (CDI_DOMINATORS, from_preds);
1335 BITMAP_FREE (from_preds);
1337 return dominated_by_p (CDI_DOMINATORS, dep_bb, cd);
1340 /* Returns true if replacing BB1 (or its replacement bb) by BB2 (or its
1341 replacement bb) and vice versa maintains the invariant that uses in the
1342 replacement are dominates by their defs. */
1344 static bool
1345 deps_ok_for_redirect (basic_block bb1, basic_block bb2)
1347 if (BB_CLUSTER (bb1) != NULL)
1348 bb1 = BB_CLUSTER (bb1)->rep_bb;
1350 if (BB_CLUSTER (bb2) != NULL)
1351 bb2 = BB_CLUSTER (bb2)->rep_bb;
1353 return (deps_ok_for_redirect_from_bb_to_bb (bb1, bb2)
1354 && deps_ok_for_redirect_from_bb_to_bb (bb2, bb1));
1357 /* Within SAME_SUCC->bbs, find clusters of bbs which can be merged. */
1359 static void
1360 find_clusters_1 (same_succ same_succ)
1362 basic_block bb1, bb2;
1363 unsigned int i, j;
1364 bitmap_iterator bi, bj;
1365 int nr_comparisons;
1366 int max_comparisons = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_COMPARISONS);
1368 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, 0, i, bi)
1370 bb1 = BASIC_BLOCK (i);
1372 /* TODO: handle blocks with phi-nodes. We'll have to find corresponding
1373 phi-nodes in bb1 and bb2, with the same alternatives for the same
1374 preds. */
1375 if (bb_has_non_vop_phi (bb1))
1376 continue;
1378 nr_comparisons = 0;
1379 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, i + 1, j, bj)
1381 bb2 = BASIC_BLOCK (j);
1383 if (bb_has_non_vop_phi (bb2))
1384 continue;
1386 if (BB_CLUSTER (bb1) != NULL && BB_CLUSTER (bb1) == BB_CLUSTER (bb2))
1387 continue;
1389 /* Limit quadratic behaviour. */
1390 nr_comparisons++;
1391 if (nr_comparisons > max_comparisons)
1392 break;
1394 /* This is a conservative dependency check. We could test more
1395 precise for allowed replacement direction. */
1396 if (!deps_ok_for_redirect (bb1, bb2))
1397 continue;
1399 if (!(same_phi_alternatives (same_succ, bb1, bb2)))
1400 continue;
1402 find_duplicate (same_succ, bb1, bb2);
1407 /* Find clusters of bbs which can be merged. */
1409 static void
1410 find_clusters (void)
1412 same_succ same;
1414 while (!VEC_empty (same_succ, worklist))
1416 same = VEC_pop (same_succ, worklist);
1417 same->in_worklist = false;
1418 if (dump_file && (dump_flags & TDF_DETAILS))
1420 fprintf (dump_file, "processing worklist entry\n");
1421 same_succ_print (dump_file, same);
1423 find_clusters_1 (same);
1427 /* Returns the vop phi of BB, if any. */
1429 static gimple
1430 vop_phi (basic_block bb)
1432 gimple stmt;
1433 gimple_stmt_iterator gsi;
1434 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1436 stmt = gsi_stmt (gsi);
1437 if (! virtual_operand_p (gimple_phi_result (stmt)))
1438 continue;
1439 return stmt;
1441 return NULL;
1444 /* Redirect all edges from BB1 to BB2, removes BB1 and marks it as removed. */
1446 static void
1447 replace_block_by (basic_block bb1, basic_block bb2)
1449 edge pred_edge;
1450 unsigned int i;
1451 gimple bb2_phi;
1453 bb2_phi = vop_phi (bb2);
1455 /* Mark the basic block as deleted. */
1456 mark_basic_block_deleted (bb1);
1458 /* Redirect the incoming edges of bb1 to bb2. */
1459 for (i = EDGE_COUNT (bb1->preds); i > 0 ; --i)
1461 pred_edge = EDGE_PRED (bb1, i - 1);
1462 pred_edge = redirect_edge_and_branch (pred_edge, bb2);
1463 gcc_assert (pred_edge != NULL);
1465 if (bb2_phi == NULL)
1466 continue;
1468 /* The phi might have run out of capacity when the redirect added an
1469 argument, which means it could have been replaced. Refresh it. */
1470 bb2_phi = vop_phi (bb2);
1472 add_phi_arg (bb2_phi, SSA_NAME_VAR (gimple_phi_result (bb2_phi)),
1473 pred_edge, UNKNOWN_LOCATION);
1476 bb2->frequency += bb1->frequency;
1477 if (bb2->frequency > BB_FREQ_MAX)
1478 bb2->frequency = BB_FREQ_MAX;
1479 bb1->frequency = 0;
1481 /* Do updates that use bb1, before deleting bb1. */
1482 release_last_vdef (bb1);
1483 same_succ_flush_bb (bb1);
1485 delete_basic_block (bb1);
1488 /* Bbs for which update_debug_stmt need to be called. */
1490 static bitmap update_bbs;
1492 /* For each cluster in all_clusters, merge all cluster->bbs. Returns
1493 number of bbs removed. */
1495 static int
1496 apply_clusters (void)
1498 basic_block bb1, bb2;
1499 bb_cluster c;
1500 unsigned int i, j;
1501 bitmap_iterator bj;
1502 int nr_bbs_removed = 0;
1504 for (i = 0; i < VEC_length (bb_cluster, all_clusters); ++i)
1506 c = VEC_index (bb_cluster, all_clusters, i);
1507 if (c == NULL)
1508 continue;
1510 bb2 = c->rep_bb;
1511 bitmap_set_bit (update_bbs, bb2->index);
1513 bitmap_clear_bit (c->bbs, bb2->index);
1514 EXECUTE_IF_SET_IN_BITMAP (c->bbs, 0, j, bj)
1516 bb1 = BASIC_BLOCK (j);
1517 bitmap_clear_bit (update_bbs, bb1->index);
1519 replace_block_by (bb1, bb2);
1520 nr_bbs_removed++;
1524 return nr_bbs_removed;
1527 /* Resets debug statement STMT if it has uses that are not dominated by their
1528 defs. */
1530 static void
1531 update_debug_stmt (gimple stmt)
1533 use_operand_p use_p;
1534 ssa_op_iter oi;
1535 basic_block bbdef, bbuse;
1536 gimple def_stmt;
1537 tree name;
1539 if (!gimple_debug_bind_p (stmt))
1540 return;
1542 bbuse = gimple_bb (stmt);
1543 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE)
1545 name = USE_FROM_PTR (use_p);
1546 gcc_assert (TREE_CODE (name) == SSA_NAME);
1548 def_stmt = SSA_NAME_DEF_STMT (name);
1549 gcc_assert (def_stmt != NULL);
1551 bbdef = gimple_bb (def_stmt);
1552 if (bbdef == NULL || bbuse == bbdef
1553 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef))
1554 continue;
1556 gimple_debug_bind_reset_value (stmt);
1557 update_stmt (stmt);
1561 /* Resets all debug statements that have uses that are not
1562 dominated by their defs. */
1564 static void
1565 update_debug_stmts (void)
1567 basic_block bb;
1568 bitmap_iterator bi;
1569 unsigned int i;
1571 EXECUTE_IF_SET_IN_BITMAP (update_bbs, 0, i, bi)
1573 gimple stmt;
1574 gimple_stmt_iterator gsi;
1576 bb = BASIC_BLOCK (i);
1577 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1579 stmt = gsi_stmt (gsi);
1580 if (!is_gimple_debug (stmt))
1581 continue;
1582 update_debug_stmt (stmt);
1587 /* Runs tail merge optimization. */
1589 unsigned int
1590 tail_merge_optimize (unsigned int todo)
1592 int nr_bbs_removed_total = 0;
1593 int nr_bbs_removed;
1594 bool loop_entered = false;
1595 int iteration_nr = 0;
1596 int max_iterations = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_ITERATIONS);
1598 if (!flag_tree_tail_merge || max_iterations == 0)
1599 return 0;
1601 timevar_push (TV_TREE_TAIL_MERGE);
1603 calculate_dominance_info (CDI_DOMINATORS);
1604 init_worklist ();
1606 while (!VEC_empty (same_succ, worklist))
1608 if (!loop_entered)
1610 loop_entered = true;
1611 alloc_cluster_vectors ();
1612 update_bbs = BITMAP_ALLOC (NULL);
1614 else
1615 reset_cluster_vectors ();
1617 iteration_nr++;
1618 if (dump_file && (dump_flags & TDF_DETAILS))
1619 fprintf (dump_file, "worklist iteration #%d\n", iteration_nr);
1621 find_clusters ();
1622 gcc_assert (VEC_empty (same_succ, worklist));
1623 if (VEC_empty (bb_cluster, all_clusters))
1624 break;
1626 nr_bbs_removed = apply_clusters ();
1627 nr_bbs_removed_total += nr_bbs_removed;
1628 if (nr_bbs_removed == 0)
1629 break;
1631 free_dominance_info (CDI_DOMINATORS);
1633 if (iteration_nr == max_iterations)
1634 break;
1636 calculate_dominance_info (CDI_DOMINATORS);
1637 update_worklist ();
1640 if (dump_file && (dump_flags & TDF_DETAILS))
1641 fprintf (dump_file, "htab collision / search: %f\n",
1642 same_succ_htab.collisions ());
1644 if (nr_bbs_removed_total > 0)
1646 if (MAY_HAVE_DEBUG_STMTS)
1648 calculate_dominance_info (CDI_DOMINATORS);
1649 update_debug_stmts ();
1652 if (dump_file && (dump_flags & TDF_DETAILS))
1654 fprintf (dump_file, "Before TODOs.\n");
1655 dump_function_to_file (current_function_decl, dump_file, dump_flags);
1658 todo |= (TODO_verify_ssa | TODO_verify_stmts | TODO_verify_flow);
1659 mark_virtual_operands_for_renaming (cfun);
1662 delete_worklist ();
1663 if (loop_entered)
1665 delete_cluster_vectors ();
1666 BITMAP_FREE (update_bbs);
1669 timevar_pop (TV_TREE_TAIL_MERGE);
1671 return todo;