Mark ChangeLog
[official-gcc.git] / gcc / tree-ssa-tail-merge.c
blob3f35657e398e18460b76ab7b7600d6b4cc0a9f2b
1 /* Tail merging for gimple.
2 Copyright (C) 2011-2013 Free Software Foundation, Inc.
3 Contributed by Tom de Vries (tom@codesourcery.com)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Pass overview.
24 MOTIVATIONAL EXAMPLE
26 gimple representation of gcc/testsuite/gcc.dg/pr43864.c at
28 hprofStartupp (charD.1 * outputFileNameD.2600, charD.1 * ctxD.2601)
30 struct FILED.1638 * fpD.2605;
31 charD.1 fileNameD.2604[1000];
32 intD.0 D.3915;
33 const charD.1 * restrict outputFileName.0D.3914;
35 # BLOCK 2 freq:10000
36 # PRED: ENTRY [100.0%] (fallthru,exec)
37 # PT = nonlocal { D.3926 } (restr)
38 outputFileName.0D.3914_3
39 = (const charD.1 * restrict) outputFileNameD.2600_2(D);
40 # .MEMD.3923_13 = VDEF <.MEMD.3923_12(D)>
41 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
42 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
43 sprintfD.759 (&fileNameD.2604, outputFileName.0D.3914_3);
44 # .MEMD.3923_14 = VDEF <.MEMD.3923_13>
45 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
46 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
47 D.3915_4 = accessD.2606 (&fileNameD.2604, 1);
48 if (D.3915_4 == 0)
49 goto <bb 3>;
50 else
51 goto <bb 4>;
52 # SUCC: 3 [10.0%] (true,exec) 4 [90.0%] (false,exec)
54 # BLOCK 3 freq:1000
55 # PRED: 2 [10.0%] (true,exec)
56 # .MEMD.3923_15 = VDEF <.MEMD.3923_14>
57 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
58 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
59 freeD.898 (ctxD.2601_5(D));
60 goto <bb 7>;
61 # SUCC: 7 [100.0%] (fallthru,exec)
63 # BLOCK 4 freq:9000
64 # PRED: 2 [90.0%] (false,exec)
65 # .MEMD.3923_16 = VDEF <.MEMD.3923_14>
66 # PT = nonlocal escaped
67 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
68 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
69 fpD.2605_8 = fopenD.1805 (&fileNameD.2604[0], 0B);
70 if (fpD.2605_8 == 0B)
71 goto <bb 5>;
72 else
73 goto <bb 6>;
74 # SUCC: 5 [1.9%] (true,exec) 6 [98.1%] (false,exec)
76 # BLOCK 5 freq:173
77 # PRED: 4 [1.9%] (true,exec)
78 # .MEMD.3923_17 = VDEF <.MEMD.3923_16>
79 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
80 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
81 freeD.898 (ctxD.2601_5(D));
82 goto <bb 7>;
83 # SUCC: 7 [100.0%] (fallthru,exec)
85 # BLOCK 6 freq:8827
86 # PRED: 4 [98.1%] (false,exec)
87 # .MEMD.3923_18 = VDEF <.MEMD.3923_16>
88 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
89 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
90 fooD.2599 (outputFileNameD.2600_2(D), fpD.2605_8);
91 # SUCC: 7 [100.0%] (fallthru,exec)
93 # BLOCK 7 freq:10000
94 # PRED: 3 [100.0%] (fallthru,exec) 5 [100.0%] (fallthru,exec)
95 6 [100.0%] (fallthru,exec)
96 # PT = nonlocal null
98 # ctxD.2601_1 = PHI <0B(3), 0B(5), ctxD.2601_5(D)(6)>
99 # .MEMD.3923_11 = PHI <.MEMD.3923_15(3), .MEMD.3923_17(5),
100 .MEMD.3923_18(6)>
101 # VUSE <.MEMD.3923_11>
102 return ctxD.2601_1;
103 # SUCC: EXIT [100.0%]
106 bb 3 and bb 5 can be merged. The blocks have different predecessors, but the
107 same successors, and the same operations.
110 CONTEXT
112 A technique called tail merging (or cross jumping) can fix the example
113 above. For a block, we look for common code at the end (the tail) of the
114 predecessor blocks, and insert jumps from one block to the other.
115 The example is a special case for tail merging, in that 2 whole blocks
116 can be merged, rather than just the end parts of it.
117 We currently only focus on whole block merging, so in that sense
118 calling this pass tail merge is a bit of a misnomer.
120 We distinguish 2 kinds of situations in which blocks can be merged:
121 - same operations, same predecessors. The successor edges coming from one
122 block are redirected to come from the other block.
123 - same operations, same successors. The predecessor edges entering one block
124 are redirected to enter the other block. Note that this operation might
125 involve introducing phi operations.
127 For efficient implementation, we would like to value numbers the blocks, and
128 have a comparison operator that tells us whether the blocks are equal.
129 Besides being runtime efficient, block value numbering should also abstract
130 from irrelevant differences in order of operations, much like normal value
131 numbering abstracts from irrelevant order of operations.
133 For the first situation (same_operations, same predecessors), normal value
134 numbering fits well. We can calculate a block value number based on the
135 value numbers of the defs and vdefs.
137 For the second situation (same operations, same successors), this approach
138 doesn't work so well. We can illustrate this using the example. The calls
139 to free use different vdefs: MEMD.3923_16 and MEMD.3923_14, and these will
140 remain different in value numbering, since they represent different memory
141 states. So the resulting vdefs of the frees will be different in value
142 numbering, so the block value numbers will be different.
144 The reason why we call the blocks equal is not because they define the same
145 values, but because uses in the blocks use (possibly different) defs in the
146 same way. To be able to detect this efficiently, we need to do some kind of
147 reverse value numbering, meaning number the uses rather than the defs, and
148 calculate a block value number based on the value number of the uses.
149 Ideally, a block comparison operator will also indicate which phis are needed
150 to merge the blocks.
152 For the moment, we don't do block value numbering, but we do insn-by-insn
153 matching, using scc value numbers to match operations with results, and
154 structural comparison otherwise, while ignoring vop mismatches.
157 IMPLEMENTATION
159 1. The pass first determines all groups of blocks with the same successor
160 blocks.
161 2. Within each group, it tries to determine clusters of equal basic blocks.
162 3. The clusters are applied.
163 4. The same successor groups are updated.
164 5. This process is repeated from 2 onwards, until no more changes.
167 LIMITATIONS/TODO
169 - block only
170 - handles only 'same operations, same successors'.
171 It handles same predecessors as a special subcase though.
172 - does not implement the reverse value numbering and block value numbering.
173 - improve memory allocation: use garbage collected memory, obstacks,
174 allocpools where appropriate.
175 - no insertion of gimple_reg phis, We only introduce vop-phis.
176 - handle blocks with gimple_reg phi_nodes.
179 SWITCHES
181 - ftree-tail-merge. On at -O2. We may have to enable it only at -Os. */
183 #include "config.h"
184 #include "system.h"
185 #include "coretypes.h"
186 #include "tm.h"
187 #include "tree.h"
188 #include "tm_p.h"
189 #include "basic-block.h"
190 #include "flags.h"
191 #include "function.h"
192 #include "tree-flow.h"
193 #include "bitmap.h"
194 #include "tree-ssa-alias.h"
195 #include "params.h"
196 #include "hash-table.h"
197 #include "gimple-pretty-print.h"
198 #include "tree-ssa-sccvn.h"
199 #include "tree-dump.h"
201 /* ??? This currently runs as part of tree-ssa-pre. Why is this not
202 a stand-alone GIMPLE pass? */
203 #include "tree-pass.h"
205 /* Describes a group of bbs with the same successors. The successor bbs are
206 cached in succs, and the successor edge flags are cached in succ_flags.
207 If a bb has the EDGE_TRUE/VALSE_VALUE flags swapped compared to succ_flags,
208 it's marked in inverse.
209 Additionally, the hash value for the struct is cached in hashval, and
210 in_worklist indicates whether it's currently part of worklist. */
212 struct same_succ_def
214 /* The bbs that have the same successor bbs. */
215 bitmap bbs;
216 /* The successor bbs. */
217 bitmap succs;
218 /* Indicates whether the EDGE_TRUE/FALSE_VALUEs of succ_flags are swapped for
219 bb. */
220 bitmap inverse;
221 /* The edge flags for each of the successor bbs. */
222 vec<int> succ_flags;
223 /* Indicates whether the struct is currently in the worklist. */
224 bool in_worklist;
225 /* The hash value of the struct. */
226 hashval_t hashval;
228 /* hash_table support. */
229 typedef same_succ_def value_type;
230 typedef same_succ_def compare_type;
231 static inline hashval_t hash (const value_type *);
232 static int equal (const value_type *, const compare_type *);
233 static void remove (value_type *);
235 typedef struct same_succ_def *same_succ;
236 typedef const struct same_succ_def *const_same_succ;
238 /* hash routine for hash_table support, returns hashval of E. */
240 inline hashval_t
241 same_succ_def::hash (const value_type *e)
243 return e->hashval;
246 /* A group of bbs where 1 bb from bbs can replace the other bbs. */
248 struct bb_cluster_def
250 /* The bbs in the cluster. */
251 bitmap bbs;
252 /* The preds of the bbs in the cluster. */
253 bitmap preds;
254 /* Index in all_clusters vector. */
255 int index;
256 /* The bb to replace the cluster with. */
257 basic_block rep_bb;
259 typedef struct bb_cluster_def *bb_cluster;
260 typedef const struct bb_cluster_def *const_bb_cluster;
262 /* Per bb-info. */
264 struct aux_bb_info
266 /* The number of non-debug statements in the bb. */
267 int size;
268 /* The same_succ that this bb is a member of. */
269 same_succ bb_same_succ;
270 /* The cluster that this bb is a member of. */
271 bb_cluster cluster;
272 /* The vop state at the exit of a bb. This is shortlived data, used to
273 communicate data between update_block_by and update_vuses. */
274 tree vop_at_exit;
275 /* The bb that either contains or is dominated by the dependencies of the
276 bb. */
277 basic_block dep_bb;
280 /* Macros to access the fields of struct aux_bb_info. */
282 #define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size)
283 #define BB_SAME_SUCC(bb) (((struct aux_bb_info *)bb->aux)->bb_same_succ)
284 #define BB_CLUSTER(bb) (((struct aux_bb_info *)bb->aux)->cluster)
285 #define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit)
286 #define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb)
288 /* Returns true if the only effect a statement STMT has, is to define locally
289 used SSA_NAMEs. */
291 static bool
292 stmt_local_def (gimple stmt)
294 basic_block bb, def_bb;
295 imm_use_iterator iter;
296 use_operand_p use_p;
297 tree val;
298 def_operand_p def_p;
300 if (gimple_has_side_effects (stmt)
301 || gimple_vdef (stmt) != NULL_TREE
302 || gimple_vuse (stmt) != NULL_TREE)
303 return false;
305 def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF);
306 if (def_p == NULL)
307 return false;
309 val = DEF_FROM_PTR (def_p);
310 if (val == NULL_TREE || TREE_CODE (val) != SSA_NAME)
311 return false;
313 def_bb = gimple_bb (stmt);
315 FOR_EACH_IMM_USE_FAST (use_p, iter, val)
317 if (is_gimple_debug (USE_STMT (use_p)))
318 continue;
319 bb = gimple_bb (USE_STMT (use_p));
320 if (bb == def_bb)
321 continue;
323 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
324 && EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src == def_bb)
325 continue;
327 return false;
330 return true;
333 /* Let GSI skip forwards over local defs. */
335 static void
336 gsi_advance_fw_nondebug_nonlocal (gimple_stmt_iterator *gsi)
338 gimple stmt;
340 while (true)
342 if (gsi_end_p (*gsi))
343 return;
344 stmt = gsi_stmt (*gsi);
345 if (!stmt_local_def (stmt))
346 return;
347 gsi_next_nondebug (gsi);
351 /* VAL1 and VAL2 are either:
352 - uses in BB1 and BB2, or
353 - phi alternatives for BB1 and BB2.
354 Return true if the uses have the same gvn value. */
356 static bool
357 gvn_uses_equal (tree val1, tree val2)
359 gcc_checking_assert (val1 != NULL_TREE && val2 != NULL_TREE);
361 if (val1 == val2)
362 return true;
364 if (vn_valueize (val1) != vn_valueize (val2))
365 return false;
367 return ((TREE_CODE (val1) == SSA_NAME || CONSTANT_CLASS_P (val1))
368 && (TREE_CODE (val2) == SSA_NAME || CONSTANT_CLASS_P (val2)));
371 /* Prints E to FILE. */
373 static void
374 same_succ_print (FILE *file, const same_succ e)
376 unsigned int i;
377 bitmap_print (file, e->bbs, "bbs:", "\n");
378 bitmap_print (file, e->succs, "succs:", "\n");
379 bitmap_print (file, e->inverse, "inverse:", "\n");
380 fprintf (file, "flags:");
381 for (i = 0; i < e->succ_flags.length (); ++i)
382 fprintf (file, " %x", e->succ_flags[i]);
383 fprintf (file, "\n");
386 /* Prints same_succ VE to VFILE. */
388 inline int
389 ssa_same_succ_print_traverse (same_succ *pe, FILE *file)
391 const same_succ e = *pe;
392 same_succ_print (file, e);
393 return 1;
396 /* Update BB_DEP_BB (USE_BB), given a use of VAL in USE_BB. */
398 static void
399 update_dep_bb (basic_block use_bb, tree val)
401 basic_block dep_bb;
403 /* Not a dep. */
404 if (TREE_CODE (val) != SSA_NAME)
405 return;
407 /* Skip use of global def. */
408 if (SSA_NAME_IS_DEFAULT_DEF (val))
409 return;
411 /* Skip use of local def. */
412 dep_bb = gimple_bb (SSA_NAME_DEF_STMT (val));
413 if (dep_bb == use_bb)
414 return;
416 if (BB_DEP_BB (use_bb) == NULL
417 || dominated_by_p (CDI_DOMINATORS, dep_bb, BB_DEP_BB (use_bb)))
418 BB_DEP_BB (use_bb) = dep_bb;
421 /* Update BB_DEP_BB, given the dependencies in STMT. */
423 static void
424 stmt_update_dep_bb (gimple stmt)
426 ssa_op_iter iter;
427 use_operand_p use;
429 FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE)
430 update_dep_bb (gimple_bb (stmt), USE_FROM_PTR (use));
433 /* Calculates hash value for same_succ VE. */
435 static hashval_t
436 same_succ_hash (const_same_succ e)
438 hashval_t hashval = bitmap_hash (e->succs);
439 int flags;
440 unsigned int i;
441 unsigned int first = bitmap_first_set_bit (e->bbs);
442 basic_block bb = BASIC_BLOCK (first);
443 int size = 0;
444 gimple_stmt_iterator gsi;
445 gimple stmt;
446 tree arg;
447 unsigned int s;
448 bitmap_iterator bs;
450 for (gsi = gsi_start_nondebug_bb (bb);
451 !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
453 stmt = gsi_stmt (gsi);
454 stmt_update_dep_bb (stmt);
455 if (stmt_local_def (stmt))
456 continue;
457 size++;
459 hashval = iterative_hash_hashval_t (gimple_code (stmt), hashval);
460 if (is_gimple_assign (stmt))
461 hashval = iterative_hash_hashval_t (gimple_assign_rhs_code (stmt),
462 hashval);
463 if (!is_gimple_call (stmt))
464 continue;
465 if (gimple_call_internal_p (stmt))
466 hashval = iterative_hash_hashval_t
467 ((hashval_t) gimple_call_internal_fn (stmt), hashval);
468 else
469 hashval = iterative_hash_expr (gimple_call_fn (stmt), hashval);
470 for (i = 0; i < gimple_call_num_args (stmt); i++)
472 arg = gimple_call_arg (stmt, i);
473 arg = vn_valueize (arg);
474 hashval = iterative_hash_expr (arg, hashval);
478 hashval = iterative_hash_hashval_t (size, hashval);
479 BB_SIZE (bb) = size;
481 for (i = 0; i < e->succ_flags.length (); ++i)
483 flags = e->succ_flags[i];
484 flags = flags & ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
485 hashval = iterative_hash_hashval_t (flags, hashval);
488 EXECUTE_IF_SET_IN_BITMAP (e->succs, 0, s, bs)
490 int n = find_edge (bb, BASIC_BLOCK (s))->dest_idx;
491 for (gsi = gsi_start_phis (BASIC_BLOCK (s)); !gsi_end_p (gsi);
492 gsi_next (&gsi))
494 gimple phi = gsi_stmt (gsi);
495 tree lhs = gimple_phi_result (phi);
496 tree val = gimple_phi_arg_def (phi, n);
498 if (virtual_operand_p (lhs))
499 continue;
500 update_dep_bb (bb, val);
504 return hashval;
507 /* Returns true if E1 and E2 have 2 successors, and if the successor flags
508 are inverse for the EDGE_TRUE_VALUE and EDGE_FALSE_VALUE flags, and equal for
509 the other edge flags. */
511 static bool
512 inverse_flags (const_same_succ e1, const_same_succ e2)
514 int f1a, f1b, f2a, f2b;
515 int mask = ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
517 if (e1->succ_flags.length () != 2)
518 return false;
520 f1a = e1->succ_flags[0];
521 f1b = e1->succ_flags[1];
522 f2a = e2->succ_flags[0];
523 f2b = e2->succ_flags[1];
525 if (f1a == f2a && f1b == f2b)
526 return false;
528 return (f1a & mask) == (f2a & mask) && (f1b & mask) == (f2b & mask);
531 /* Compares SAME_SUCCs E1 and E2. */
534 same_succ_def::equal (const value_type *e1, const compare_type *e2)
536 unsigned int i, first1, first2;
537 gimple_stmt_iterator gsi1, gsi2;
538 gimple s1, s2;
539 basic_block bb1, bb2;
541 if (e1->hashval != e2->hashval)
542 return 0;
544 if (e1->succ_flags.length () != e2->succ_flags.length ())
545 return 0;
547 if (!bitmap_equal_p (e1->succs, e2->succs))
548 return 0;
550 if (!inverse_flags (e1, e2))
552 for (i = 0; i < e1->succ_flags.length (); ++i)
553 if (e1->succ_flags[i] != e1->succ_flags[i])
554 return 0;
557 first1 = bitmap_first_set_bit (e1->bbs);
558 first2 = bitmap_first_set_bit (e2->bbs);
560 bb1 = BASIC_BLOCK (first1);
561 bb2 = BASIC_BLOCK (first2);
563 if (BB_SIZE (bb1) != BB_SIZE (bb2))
564 return 0;
566 gsi1 = gsi_start_nondebug_bb (bb1);
567 gsi2 = gsi_start_nondebug_bb (bb2);
568 gsi_advance_fw_nondebug_nonlocal (&gsi1);
569 gsi_advance_fw_nondebug_nonlocal (&gsi2);
570 while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2)))
572 s1 = gsi_stmt (gsi1);
573 s2 = gsi_stmt (gsi2);
574 if (gimple_code (s1) != gimple_code (s2))
575 return 0;
576 if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2))
577 return 0;
578 gsi_next_nondebug (&gsi1);
579 gsi_next_nondebug (&gsi2);
580 gsi_advance_fw_nondebug_nonlocal (&gsi1);
581 gsi_advance_fw_nondebug_nonlocal (&gsi2);
584 return 1;
587 /* Alloc and init a new SAME_SUCC. */
589 static same_succ
590 same_succ_alloc (void)
592 same_succ same = XNEW (struct same_succ_def);
594 same->bbs = BITMAP_ALLOC (NULL);
595 same->succs = BITMAP_ALLOC (NULL);
596 same->inverse = BITMAP_ALLOC (NULL);
597 same->succ_flags.create (10);
598 same->in_worklist = false;
600 return same;
603 /* Delete same_succ E. */
605 void
606 same_succ_def::remove (same_succ e)
608 BITMAP_FREE (e->bbs);
609 BITMAP_FREE (e->succs);
610 BITMAP_FREE (e->inverse);
611 e->succ_flags.release ();
613 XDELETE (e);
616 /* Reset same_succ SAME. */
618 static void
619 same_succ_reset (same_succ same)
621 bitmap_clear (same->bbs);
622 bitmap_clear (same->succs);
623 bitmap_clear (same->inverse);
624 same->succ_flags.truncate (0);
627 static hash_table <same_succ_def> same_succ_htab;
629 /* Array that is used to store the edge flags for a successor. */
631 static int *same_succ_edge_flags;
633 /* Bitmap that is used to mark bbs that are recently deleted. */
635 static bitmap deleted_bbs;
637 /* Bitmap that is used to mark predecessors of bbs that are
638 deleted. */
640 static bitmap deleted_bb_preds;
642 /* Prints same_succ_htab to stderr. */
644 extern void debug_same_succ (void);
645 DEBUG_FUNCTION void
646 debug_same_succ ( void)
648 same_succ_htab.traverse <FILE *, ssa_same_succ_print_traverse> (stderr);
652 /* Vector of bbs to process. */
654 static vec<same_succ> worklist;
656 /* Prints worklist to FILE. */
658 static void
659 print_worklist (FILE *file)
661 unsigned int i;
662 for (i = 0; i < worklist.length (); ++i)
663 same_succ_print (file, worklist[i]);
666 /* Adds SAME to worklist. */
668 static void
669 add_to_worklist (same_succ same)
671 if (same->in_worklist)
672 return;
674 if (bitmap_count_bits (same->bbs) < 2)
675 return;
677 same->in_worklist = true;
678 worklist.safe_push (same);
681 /* Add BB to same_succ_htab. */
683 static void
684 find_same_succ_bb (basic_block bb, same_succ *same_p)
686 unsigned int j;
687 bitmap_iterator bj;
688 same_succ same = *same_p;
689 same_succ *slot;
690 edge_iterator ei;
691 edge e;
693 if (bb == NULL)
694 return;
695 bitmap_set_bit (same->bbs, bb->index);
696 FOR_EACH_EDGE (e, ei, bb->succs)
698 int index = e->dest->index;
699 bitmap_set_bit (same->succs, index);
700 same_succ_edge_flags[index] = e->flags;
702 EXECUTE_IF_SET_IN_BITMAP (same->succs, 0, j, bj)
703 same->succ_flags.safe_push (same_succ_edge_flags[j]);
705 same->hashval = same_succ_hash (same);
707 slot = same_succ_htab.find_slot_with_hash (same, same->hashval, INSERT);
708 if (*slot == NULL)
710 *slot = same;
711 BB_SAME_SUCC (bb) = same;
712 add_to_worklist (same);
713 *same_p = NULL;
715 else
717 bitmap_set_bit ((*slot)->bbs, bb->index);
718 BB_SAME_SUCC (bb) = *slot;
719 add_to_worklist (*slot);
720 if (inverse_flags (same, *slot))
721 bitmap_set_bit ((*slot)->inverse, bb->index);
722 same_succ_reset (same);
726 /* Find bbs with same successors. */
728 static void
729 find_same_succ (void)
731 same_succ same = same_succ_alloc ();
732 basic_block bb;
734 FOR_EACH_BB (bb)
736 find_same_succ_bb (bb, &same);
737 if (same == NULL)
738 same = same_succ_alloc ();
741 same_succ_def::remove (same);
744 /* Initializes worklist administration. */
746 static void
747 init_worklist (void)
749 alloc_aux_for_blocks (sizeof (struct aux_bb_info));
750 same_succ_htab.create (n_basic_blocks);
751 same_succ_edge_flags = XCNEWVEC (int, last_basic_block);
752 deleted_bbs = BITMAP_ALLOC (NULL);
753 deleted_bb_preds = BITMAP_ALLOC (NULL);
754 worklist.create (n_basic_blocks);
755 find_same_succ ();
757 if (dump_file && (dump_flags & TDF_DETAILS))
759 fprintf (dump_file, "initial worklist:\n");
760 print_worklist (dump_file);
764 /* Deletes worklist administration. */
766 static void
767 delete_worklist (void)
769 free_aux_for_blocks ();
770 same_succ_htab.dispose ();
771 XDELETEVEC (same_succ_edge_flags);
772 same_succ_edge_flags = NULL;
773 BITMAP_FREE (deleted_bbs);
774 BITMAP_FREE (deleted_bb_preds);
775 worklist.release ();
778 /* Mark BB as deleted, and mark its predecessors. */
780 static void
781 mark_basic_block_deleted (basic_block bb)
783 edge e;
784 edge_iterator ei;
786 bitmap_set_bit (deleted_bbs, bb->index);
788 FOR_EACH_EDGE (e, ei, bb->preds)
789 bitmap_set_bit (deleted_bb_preds, e->src->index);
792 /* Removes BB from its corresponding same_succ. */
794 static void
795 same_succ_flush_bb (basic_block bb)
797 same_succ same = BB_SAME_SUCC (bb);
798 BB_SAME_SUCC (bb) = NULL;
799 if (bitmap_single_bit_set_p (same->bbs))
800 same_succ_htab.remove_elt_with_hash (same, same->hashval);
801 else
802 bitmap_clear_bit (same->bbs, bb->index);
805 /* Removes all bbs in BBS from their corresponding same_succ. */
807 static void
808 same_succ_flush_bbs (bitmap bbs)
810 unsigned int i;
811 bitmap_iterator bi;
813 EXECUTE_IF_SET_IN_BITMAP (bbs, 0, i, bi)
814 same_succ_flush_bb (BASIC_BLOCK (i));
817 /* Release the last vdef in BB, either normal or phi result. */
819 static void
820 release_last_vdef (basic_block bb)
822 gimple_stmt_iterator i;
824 for (i = gsi_last_bb (bb); !gsi_end_p (i); gsi_prev_nondebug (&i))
826 gimple stmt = gsi_stmt (i);
827 if (gimple_vdef (stmt) == NULL_TREE)
828 continue;
830 mark_virtual_operand_for_renaming (gimple_vdef (stmt));
831 return;
834 for (i = gsi_start_phis (bb); !gsi_end_p (i); gsi_next (&i))
836 gimple phi = gsi_stmt (i);
837 tree res = gimple_phi_result (phi);
839 if (!virtual_operand_p (res))
840 continue;
842 mark_virtual_phi_result_for_renaming (phi);
843 return;
848 /* For deleted_bb_preds, find bbs with same successors. */
850 static void
851 update_worklist (void)
853 unsigned int i;
854 bitmap_iterator bi;
855 basic_block bb;
856 same_succ same;
858 bitmap_and_compl_into (deleted_bb_preds, deleted_bbs);
859 bitmap_clear (deleted_bbs);
861 bitmap_clear_bit (deleted_bb_preds, ENTRY_BLOCK);
862 same_succ_flush_bbs (deleted_bb_preds);
864 same = same_succ_alloc ();
865 EXECUTE_IF_SET_IN_BITMAP (deleted_bb_preds, 0, i, bi)
867 bb = BASIC_BLOCK (i);
868 gcc_assert (bb != NULL);
869 find_same_succ_bb (bb, &same);
870 if (same == NULL)
871 same = same_succ_alloc ();
873 same_succ_def::remove (same);
874 bitmap_clear (deleted_bb_preds);
877 /* Prints cluster C to FILE. */
879 static void
880 print_cluster (FILE *file, bb_cluster c)
882 if (c == NULL)
883 return;
884 bitmap_print (file, c->bbs, "bbs:", "\n");
885 bitmap_print (file, c->preds, "preds:", "\n");
888 /* Prints cluster C to stderr. */
890 extern void debug_cluster (bb_cluster);
891 DEBUG_FUNCTION void
892 debug_cluster (bb_cluster c)
894 print_cluster (stderr, c);
897 /* Update C->rep_bb, given that BB is added to the cluster. */
899 static void
900 update_rep_bb (bb_cluster c, basic_block bb)
902 /* Initial. */
903 if (c->rep_bb == NULL)
905 c->rep_bb = bb;
906 return;
909 /* Current needs no deps, keep it. */
910 if (BB_DEP_BB (c->rep_bb) == NULL)
911 return;
913 /* Bb needs no deps, change rep_bb. */
914 if (BB_DEP_BB (bb) == NULL)
916 c->rep_bb = bb;
917 return;
920 /* Bb needs last deps earlier than current, change rep_bb. A potential
921 problem with this, is that the first deps might also be earlier, which
922 would mean we prefer longer lifetimes for the deps. To be able to check
923 for this, we would have to trace BB_FIRST_DEP_BB as well, besides
924 BB_DEP_BB, which is really BB_LAST_DEP_BB.
925 The benefit of choosing the bb with last deps earlier, is that it can
926 potentially be used as replacement for more bbs. */
927 if (dominated_by_p (CDI_DOMINATORS, BB_DEP_BB (c->rep_bb), BB_DEP_BB (bb)))
928 c->rep_bb = bb;
931 /* Add BB to cluster C. Sets BB in C->bbs, and preds of BB in C->preds. */
933 static void
934 add_bb_to_cluster (bb_cluster c, basic_block bb)
936 edge e;
937 edge_iterator ei;
939 bitmap_set_bit (c->bbs, bb->index);
941 FOR_EACH_EDGE (e, ei, bb->preds)
942 bitmap_set_bit (c->preds, e->src->index);
944 update_rep_bb (c, bb);
947 /* Allocate and init new cluster. */
949 static bb_cluster
950 new_cluster (void)
952 bb_cluster c;
953 c = XCNEW (struct bb_cluster_def);
954 c->bbs = BITMAP_ALLOC (NULL);
955 c->preds = BITMAP_ALLOC (NULL);
956 c->rep_bb = NULL;
957 return c;
960 /* Delete clusters. */
962 static void
963 delete_cluster (bb_cluster c)
965 if (c == NULL)
966 return;
967 BITMAP_FREE (c->bbs);
968 BITMAP_FREE (c->preds);
969 XDELETE (c);
973 /* Array that contains all clusters. */
975 static vec<bb_cluster> all_clusters;
977 /* Allocate all cluster vectors. */
979 static void
980 alloc_cluster_vectors (void)
982 all_clusters.create (n_basic_blocks);
985 /* Reset all cluster vectors. */
987 static void
988 reset_cluster_vectors (void)
990 unsigned int i;
991 basic_block bb;
992 for (i = 0; i < all_clusters.length (); ++i)
993 delete_cluster (all_clusters[i]);
994 all_clusters.truncate (0);
995 FOR_EACH_BB (bb)
996 BB_CLUSTER (bb) = NULL;
999 /* Delete all cluster vectors. */
1001 static void
1002 delete_cluster_vectors (void)
1004 unsigned int i;
1005 for (i = 0; i < all_clusters.length (); ++i)
1006 delete_cluster (all_clusters[i]);
1007 all_clusters.release ();
1010 /* Merge cluster C2 into C1. */
1012 static void
1013 merge_clusters (bb_cluster c1, bb_cluster c2)
1015 bitmap_ior_into (c1->bbs, c2->bbs);
1016 bitmap_ior_into (c1->preds, c2->preds);
1019 /* Register equivalence of BB1 and BB2 (members of cluster C). Store c in
1020 all_clusters, or merge c with existing cluster. */
1022 static void
1023 set_cluster (basic_block bb1, basic_block bb2)
1025 basic_block merge_bb, other_bb;
1026 bb_cluster merge, old, c;
1028 if (BB_CLUSTER (bb1) == NULL && BB_CLUSTER (bb2) == NULL)
1030 c = new_cluster ();
1031 add_bb_to_cluster (c, bb1);
1032 add_bb_to_cluster (c, bb2);
1033 BB_CLUSTER (bb1) = c;
1034 BB_CLUSTER (bb2) = c;
1035 c->index = all_clusters.length ();
1036 all_clusters.safe_push (c);
1038 else if (BB_CLUSTER (bb1) == NULL || BB_CLUSTER (bb2) == NULL)
1040 merge_bb = BB_CLUSTER (bb1) == NULL ? bb2 : bb1;
1041 other_bb = BB_CLUSTER (bb1) == NULL ? bb1 : bb2;
1042 merge = BB_CLUSTER (merge_bb);
1043 add_bb_to_cluster (merge, other_bb);
1044 BB_CLUSTER (other_bb) = merge;
1046 else if (BB_CLUSTER (bb1) != BB_CLUSTER (bb2))
1048 unsigned int i;
1049 bitmap_iterator bi;
1051 old = BB_CLUSTER (bb2);
1052 merge = BB_CLUSTER (bb1);
1053 merge_clusters (merge, old);
1054 EXECUTE_IF_SET_IN_BITMAP (old->bbs, 0, i, bi)
1055 BB_CLUSTER (BASIC_BLOCK (i)) = merge;
1056 all_clusters[old->index] = NULL;
1057 update_rep_bb (merge, old->rep_bb);
1058 delete_cluster (old);
1060 else
1061 gcc_unreachable ();
1064 /* Return true if gimple operands T1 and T2 have the same value. */
1066 static bool
1067 gimple_operand_equal_value_p (tree t1, tree t2)
1069 if (t1 == t2)
1070 return true;
1072 if (t1 == NULL_TREE
1073 || t2 == NULL_TREE)
1074 return false;
1076 if (operand_equal_p (t1, t2, 0))
1077 return true;
1079 return gvn_uses_equal (t1, t2);
1082 /* Return true if gimple statements S1 and S2 are equal. Gimple_bb (s1) and
1083 gimple_bb (s2) are members of SAME_SUCC. */
1085 static bool
1086 gimple_equal_p (same_succ same_succ, gimple s1, gimple s2)
1088 unsigned int i;
1089 tree lhs1, lhs2;
1090 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1091 tree t1, t2;
1092 bool equal, inv_cond;
1093 enum tree_code code1, code2;
1095 if (gimple_code (s1) != gimple_code (s2))
1096 return false;
1098 switch (gimple_code (s1))
1100 case GIMPLE_CALL:
1101 if (gimple_call_num_args (s1) != gimple_call_num_args (s2))
1102 return false;
1103 if (!gimple_call_same_target_p (s1, s2))
1104 return false;
1106 /* Eventually, we'll significantly complicate the CFG by adding
1107 back edges to properly model the effects of transaction restart.
1108 For the bulk of optimization this does not matter, but what we
1109 cannot recover from is tail merging blocks between two separate
1110 transactions. Avoid that by making commit not match. */
1111 if (gimple_call_builtin_p (s1, BUILT_IN_TM_COMMIT))
1112 return false;
1114 equal = true;
1115 for (i = 0; i < gimple_call_num_args (s1); ++i)
1117 t1 = gimple_call_arg (s1, i);
1118 t2 = gimple_call_arg (s2, i);
1119 if (operand_equal_p (t1, t2, 0))
1120 continue;
1121 if (gvn_uses_equal (t1, t2))
1122 continue;
1123 equal = false;
1124 break;
1126 if (!equal)
1127 return false;
1129 lhs1 = gimple_get_lhs (s1);
1130 lhs2 = gimple_get_lhs (s2);
1131 if (lhs1 == NULL_TREE && lhs2 == NULL_TREE)
1132 return true;
1133 if (lhs1 == NULL_TREE || lhs2 == NULL_TREE)
1134 return false;
1135 if (TREE_CODE (lhs1) == SSA_NAME && TREE_CODE (lhs2) == SSA_NAME)
1136 return vn_valueize (lhs1) == vn_valueize (lhs2);
1137 return operand_equal_p (lhs1, lhs2, 0);
1139 case GIMPLE_ASSIGN:
1140 lhs1 = gimple_get_lhs (s1);
1141 lhs2 = gimple_get_lhs (s2);
1142 if (TREE_CODE (lhs1) != SSA_NAME
1143 && TREE_CODE (lhs2) != SSA_NAME)
1144 return (operand_equal_p (lhs1, lhs2, 0)
1145 && gimple_operand_equal_value_p (gimple_assign_rhs1 (s1),
1146 gimple_assign_rhs1 (s2)));
1147 else if (TREE_CODE (lhs1) == SSA_NAME
1148 && TREE_CODE (lhs2) == SSA_NAME)
1149 return operand_equal_p (gimple_assign_rhs1 (s1),
1150 gimple_assign_rhs1 (s2), 0);
1151 return false;
1153 case GIMPLE_COND:
1154 t1 = gimple_cond_lhs (s1);
1155 t2 = gimple_cond_lhs (s2);
1156 if (!operand_equal_p (t1, t2, 0)
1157 && !gvn_uses_equal (t1, t2))
1158 return false;
1160 t1 = gimple_cond_rhs (s1);
1161 t2 = gimple_cond_rhs (s2);
1162 if (!operand_equal_p (t1, t2, 0)
1163 && !gvn_uses_equal (t1, t2))
1164 return false;
1166 code1 = gimple_expr_code (s1);
1167 code2 = gimple_expr_code (s2);
1168 inv_cond = (bitmap_bit_p (same_succ->inverse, bb1->index)
1169 != bitmap_bit_p (same_succ->inverse, bb2->index));
1170 if (inv_cond)
1172 bool honor_nans
1173 = HONOR_NANS (TYPE_MODE (TREE_TYPE (gimple_cond_lhs (s1))));
1174 code2 = invert_tree_comparison (code2, honor_nans);
1176 return code1 == code2;
1178 default:
1179 return false;
1183 /* Let GSI skip backwards over local defs. Return the earliest vuse in VUSE.
1184 Return true in VUSE_ESCAPED if the vuse influenced a SSA_OP_DEF of one of the
1185 processed statements. */
1187 static void
1188 gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator *gsi, tree *vuse,
1189 bool *vuse_escaped)
1191 gimple stmt;
1192 tree lvuse;
1194 while (true)
1196 if (gsi_end_p (*gsi))
1197 return;
1198 stmt = gsi_stmt (*gsi);
1200 lvuse = gimple_vuse (stmt);
1201 if (lvuse != NULL_TREE)
1203 *vuse = lvuse;
1204 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_DEF))
1205 *vuse_escaped = true;
1208 if (!stmt_local_def (stmt))
1209 return;
1210 gsi_prev_nondebug (gsi);
1214 /* Determines whether BB1 and BB2 (members of same_succ) are duplicates. If so,
1215 clusters them. */
1217 static void
1218 find_duplicate (same_succ same_succ, basic_block bb1, basic_block bb2)
1220 gimple_stmt_iterator gsi1 = gsi_last_nondebug_bb (bb1);
1221 gimple_stmt_iterator gsi2 = gsi_last_nondebug_bb (bb2);
1222 tree vuse1 = NULL_TREE, vuse2 = NULL_TREE;
1223 bool vuse_escaped = false;
1225 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1226 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1228 while (!gsi_end_p (gsi1) && !gsi_end_p (gsi2))
1230 gimple stmt1 = gsi_stmt (gsi1);
1231 gimple stmt2 = gsi_stmt (gsi2);
1233 if (!gimple_equal_p (same_succ, stmt1, stmt2))
1234 return;
1236 // We cannot tail-merge the builtins that end transactions.
1237 // ??? The alternative being unsharing of BBs in the tm_init pass.
1238 if (flag_tm
1239 && is_gimple_call (stmt1)
1240 && (gimple_call_flags (stmt1) & ECF_TM_BUILTIN)
1241 && is_tm_ending_fndecl (gimple_call_fndecl (stmt1)))
1242 return;
1244 gsi_prev_nondebug (&gsi1);
1245 gsi_prev_nondebug (&gsi2);
1246 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1247 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1250 if (!(gsi_end_p (gsi1) && gsi_end_p (gsi2)))
1251 return;
1253 /* If the incoming vuses are not the same, and the vuse escaped into an
1254 SSA_OP_DEF, then merging the 2 blocks will change the value of the def,
1255 which potentially means the semantics of one of the blocks will be changed.
1256 TODO: make this check more precise. */
1257 if (vuse_escaped && vuse1 != vuse2)
1258 return;
1260 if (dump_file)
1261 fprintf (dump_file, "find_duplicates: <bb %d> duplicate of <bb %d>\n",
1262 bb1->index, bb2->index);
1264 set_cluster (bb1, bb2);
1267 /* Returns whether for all phis in DEST the phi alternatives for E1 and
1268 E2 are equal. */
1270 static bool
1271 same_phi_alternatives_1 (basic_block dest, edge e1, edge e2)
1273 int n1 = e1->dest_idx, n2 = e2->dest_idx;
1274 gimple_stmt_iterator gsi;
1276 for (gsi = gsi_start_phis (dest); !gsi_end_p (gsi); gsi_next (&gsi))
1278 gimple phi = gsi_stmt (gsi);
1279 tree lhs = gimple_phi_result (phi);
1280 tree val1 = gimple_phi_arg_def (phi, n1);
1281 tree val2 = gimple_phi_arg_def (phi, n2);
1283 if (virtual_operand_p (lhs))
1284 continue;
1286 if (operand_equal_for_phi_arg_p (val1, val2))
1287 continue;
1288 if (gvn_uses_equal (val1, val2))
1289 continue;
1291 return false;
1294 return true;
1297 /* Returns whether for all successors of BB1 and BB2 (members of SAME_SUCC), the
1298 phi alternatives for BB1 and BB2 are equal. */
1300 static bool
1301 same_phi_alternatives (same_succ same_succ, basic_block bb1, basic_block bb2)
1303 unsigned int s;
1304 bitmap_iterator bs;
1305 edge e1, e2;
1306 basic_block succ;
1308 EXECUTE_IF_SET_IN_BITMAP (same_succ->succs, 0, s, bs)
1310 succ = BASIC_BLOCK (s);
1311 e1 = find_edge (bb1, succ);
1312 e2 = find_edge (bb2, succ);
1313 if (e1->flags & EDGE_COMPLEX
1314 || e2->flags & EDGE_COMPLEX)
1315 return false;
1317 /* For all phis in bb, the phi alternatives for e1 and e2 need to have
1318 the same value. */
1319 if (!same_phi_alternatives_1 (succ, e1, e2))
1320 return false;
1323 return true;
1326 /* Return true if BB has non-vop phis. */
1328 static bool
1329 bb_has_non_vop_phi (basic_block bb)
1331 gimple_seq phis = phi_nodes (bb);
1332 gimple phi;
1334 if (phis == NULL)
1335 return false;
1337 if (!gimple_seq_singleton_p (phis))
1338 return true;
1340 phi = gimple_seq_first_stmt (phis);
1341 return !virtual_operand_p (gimple_phi_result (phi));
1344 /* Returns true if redirecting the incoming edges of FROM to TO maintains the
1345 invariant that uses in FROM are dominates by their defs. */
1347 static bool
1348 deps_ok_for_redirect_from_bb_to_bb (basic_block from, basic_block to)
1350 basic_block cd, dep_bb = BB_DEP_BB (to);
1351 edge_iterator ei;
1352 edge e;
1353 bitmap from_preds = BITMAP_ALLOC (NULL);
1355 if (dep_bb == NULL)
1356 return true;
1358 FOR_EACH_EDGE (e, ei, from->preds)
1359 bitmap_set_bit (from_preds, e->src->index);
1360 cd = nearest_common_dominator_for_set (CDI_DOMINATORS, from_preds);
1361 BITMAP_FREE (from_preds);
1363 return dominated_by_p (CDI_DOMINATORS, dep_bb, cd);
1366 /* Returns true if replacing BB1 (or its replacement bb) by BB2 (or its
1367 replacement bb) and vice versa maintains the invariant that uses in the
1368 replacement are dominates by their defs. */
1370 static bool
1371 deps_ok_for_redirect (basic_block bb1, basic_block bb2)
1373 if (BB_CLUSTER (bb1) != NULL)
1374 bb1 = BB_CLUSTER (bb1)->rep_bb;
1376 if (BB_CLUSTER (bb2) != NULL)
1377 bb2 = BB_CLUSTER (bb2)->rep_bb;
1379 return (deps_ok_for_redirect_from_bb_to_bb (bb1, bb2)
1380 && deps_ok_for_redirect_from_bb_to_bb (bb2, bb1));
1383 /* Within SAME_SUCC->bbs, find clusters of bbs which can be merged. */
1385 static void
1386 find_clusters_1 (same_succ same_succ)
1388 basic_block bb1, bb2;
1389 unsigned int i, j;
1390 bitmap_iterator bi, bj;
1391 int nr_comparisons;
1392 int max_comparisons = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_COMPARISONS);
1394 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, 0, i, bi)
1396 bb1 = BASIC_BLOCK (i);
1398 /* TODO: handle blocks with phi-nodes. We'll have to find corresponding
1399 phi-nodes in bb1 and bb2, with the same alternatives for the same
1400 preds. */
1401 if (bb_has_non_vop_phi (bb1))
1402 continue;
1404 nr_comparisons = 0;
1405 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, i + 1, j, bj)
1407 bb2 = BASIC_BLOCK (j);
1409 if (bb_has_non_vop_phi (bb2))
1410 continue;
1412 if (BB_CLUSTER (bb1) != NULL && BB_CLUSTER (bb1) == BB_CLUSTER (bb2))
1413 continue;
1415 /* Limit quadratic behaviour. */
1416 nr_comparisons++;
1417 if (nr_comparisons > max_comparisons)
1418 break;
1420 /* This is a conservative dependency check. We could test more
1421 precise for allowed replacement direction. */
1422 if (!deps_ok_for_redirect (bb1, bb2))
1423 continue;
1425 if (!(same_phi_alternatives (same_succ, bb1, bb2)))
1426 continue;
1428 find_duplicate (same_succ, bb1, bb2);
1433 /* Find clusters of bbs which can be merged. */
1435 static void
1436 find_clusters (void)
1438 same_succ same;
1440 while (!worklist.is_empty ())
1442 same = worklist.pop ();
1443 same->in_worklist = false;
1444 if (dump_file && (dump_flags & TDF_DETAILS))
1446 fprintf (dump_file, "processing worklist entry\n");
1447 same_succ_print (dump_file, same);
1449 find_clusters_1 (same);
1453 /* Returns the vop phi of BB, if any. */
1455 static gimple
1456 vop_phi (basic_block bb)
1458 gimple stmt;
1459 gimple_stmt_iterator gsi;
1460 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1462 stmt = gsi_stmt (gsi);
1463 if (! virtual_operand_p (gimple_phi_result (stmt)))
1464 continue;
1465 return stmt;
1467 return NULL;
1470 /* Redirect all edges from BB1 to BB2, removes BB1 and marks it as removed. */
1472 static void
1473 replace_block_by (basic_block bb1, basic_block bb2)
1475 edge pred_edge;
1476 unsigned int i;
1477 gimple bb2_phi;
1479 bb2_phi = vop_phi (bb2);
1481 /* Mark the basic block as deleted. */
1482 mark_basic_block_deleted (bb1);
1484 /* Redirect the incoming edges of bb1 to bb2. */
1485 for (i = EDGE_COUNT (bb1->preds); i > 0 ; --i)
1487 pred_edge = EDGE_PRED (bb1, i - 1);
1488 pred_edge = redirect_edge_and_branch (pred_edge, bb2);
1489 gcc_assert (pred_edge != NULL);
1491 if (bb2_phi == NULL)
1492 continue;
1494 /* The phi might have run out of capacity when the redirect added an
1495 argument, which means it could have been replaced. Refresh it. */
1496 bb2_phi = vop_phi (bb2);
1498 add_phi_arg (bb2_phi, SSA_NAME_VAR (gimple_phi_result (bb2_phi)),
1499 pred_edge, UNKNOWN_LOCATION);
1502 bb2->frequency += bb1->frequency;
1503 if (bb2->frequency > BB_FREQ_MAX)
1504 bb2->frequency = BB_FREQ_MAX;
1506 bb2->count += bb1->count;
1508 /* Do updates that use bb1, before deleting bb1. */
1509 release_last_vdef (bb1);
1510 same_succ_flush_bb (bb1);
1512 delete_basic_block (bb1);
1515 /* Bbs for which update_debug_stmt need to be called. */
1517 static bitmap update_bbs;
1519 /* For each cluster in all_clusters, merge all cluster->bbs. Returns
1520 number of bbs removed. */
1522 static int
1523 apply_clusters (void)
1525 basic_block bb1, bb2;
1526 bb_cluster c;
1527 unsigned int i, j;
1528 bitmap_iterator bj;
1529 int nr_bbs_removed = 0;
1531 for (i = 0; i < all_clusters.length (); ++i)
1533 c = all_clusters[i];
1534 if (c == NULL)
1535 continue;
1537 bb2 = c->rep_bb;
1538 bitmap_set_bit (update_bbs, bb2->index);
1540 bitmap_clear_bit (c->bbs, bb2->index);
1541 EXECUTE_IF_SET_IN_BITMAP (c->bbs, 0, j, bj)
1543 bb1 = BASIC_BLOCK (j);
1544 bitmap_clear_bit (update_bbs, bb1->index);
1546 replace_block_by (bb1, bb2);
1547 nr_bbs_removed++;
1551 return nr_bbs_removed;
1554 /* Resets debug statement STMT if it has uses that are not dominated by their
1555 defs. */
1557 static void
1558 update_debug_stmt (gimple stmt)
1560 use_operand_p use_p;
1561 ssa_op_iter oi;
1562 basic_block bbdef, bbuse;
1563 gimple def_stmt;
1564 tree name;
1566 if (!gimple_debug_bind_p (stmt))
1567 return;
1569 bbuse = gimple_bb (stmt);
1570 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE)
1572 name = USE_FROM_PTR (use_p);
1573 gcc_assert (TREE_CODE (name) == SSA_NAME);
1575 def_stmt = SSA_NAME_DEF_STMT (name);
1576 gcc_assert (def_stmt != NULL);
1578 bbdef = gimple_bb (def_stmt);
1579 if (bbdef == NULL || bbuse == bbdef
1580 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef))
1581 continue;
1583 gimple_debug_bind_reset_value (stmt);
1584 update_stmt (stmt);
1588 /* Resets all debug statements that have uses that are not
1589 dominated by their defs. */
1591 static void
1592 update_debug_stmts (void)
1594 basic_block bb;
1595 bitmap_iterator bi;
1596 unsigned int i;
1598 EXECUTE_IF_SET_IN_BITMAP (update_bbs, 0, i, bi)
1600 gimple stmt;
1601 gimple_stmt_iterator gsi;
1603 bb = BASIC_BLOCK (i);
1604 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1606 stmt = gsi_stmt (gsi);
1607 if (!is_gimple_debug (stmt))
1608 continue;
1609 update_debug_stmt (stmt);
1614 /* Runs tail merge optimization. */
1616 unsigned int
1617 tail_merge_optimize (unsigned int todo)
1619 int nr_bbs_removed_total = 0;
1620 int nr_bbs_removed;
1621 bool loop_entered = false;
1622 int iteration_nr = 0;
1623 int max_iterations = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_ITERATIONS);
1625 if (!flag_tree_tail_merge || max_iterations == 0)
1626 return 0;
1628 timevar_push (TV_TREE_TAIL_MERGE);
1630 if (!dom_info_available_p (CDI_DOMINATORS))
1632 /* PRE can leave us with unreachable blocks, remove them now. */
1633 delete_unreachable_blocks ();
1634 calculate_dominance_info (CDI_DOMINATORS);
1636 init_worklist ();
1638 while (!worklist.is_empty ())
1640 if (!loop_entered)
1642 loop_entered = true;
1643 alloc_cluster_vectors ();
1644 update_bbs = BITMAP_ALLOC (NULL);
1646 else
1647 reset_cluster_vectors ();
1649 iteration_nr++;
1650 if (dump_file && (dump_flags & TDF_DETAILS))
1651 fprintf (dump_file, "worklist iteration #%d\n", iteration_nr);
1653 find_clusters ();
1654 gcc_assert (worklist.is_empty ());
1655 if (all_clusters.is_empty ())
1656 break;
1658 nr_bbs_removed = apply_clusters ();
1659 nr_bbs_removed_total += nr_bbs_removed;
1660 if (nr_bbs_removed == 0)
1661 break;
1663 free_dominance_info (CDI_DOMINATORS);
1665 if (iteration_nr == max_iterations)
1666 break;
1668 calculate_dominance_info (CDI_DOMINATORS);
1669 update_worklist ();
1672 if (dump_file && (dump_flags & TDF_DETAILS))
1673 fprintf (dump_file, "htab collision / search: %f\n",
1674 same_succ_htab.collisions ());
1676 if (nr_bbs_removed_total > 0)
1678 if (MAY_HAVE_DEBUG_STMTS)
1680 calculate_dominance_info (CDI_DOMINATORS);
1681 update_debug_stmts ();
1684 if (dump_file && (dump_flags & TDF_DETAILS))
1686 fprintf (dump_file, "Before TODOs.\n");
1687 dump_function_to_file (current_function_decl, dump_file, dump_flags);
1690 todo |= (TODO_verify_ssa | TODO_verify_stmts | TODO_verify_flow);
1691 mark_virtual_operands_for_renaming (cfun);
1694 delete_worklist ();
1695 if (loop_entered)
1697 delete_cluster_vectors ();
1698 BITMAP_FREE (update_bbs);
1701 timevar_pop (TV_TREE_TAIL_MERGE);
1703 return todo;