2012-11-16 François Dumont <fdumont@gcc.gnu.org>
[official-gcc.git] / gcc / tree-ssa-tail-merge.c
blob660b68c10b2d4ad64123dd91d03cad15aa726b40
1 /* Tail merging for gimple.
2 Copyright (C) 2011, 2012 Free Software Foundation, Inc.
3 Contributed by Tom de Vries (tom@codesourcery.com)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Pass overview.
24 MOTIVATIONAL EXAMPLE
26 gimple representation of gcc/testsuite/gcc.dg/pr43864.c at
28 hprofStartupp (charD.1 * outputFileNameD.2600, charD.1 * ctxD.2601)
30 struct FILED.1638 * fpD.2605;
31 charD.1 fileNameD.2604[1000];
32 intD.0 D.3915;
33 const charD.1 * restrict outputFileName.0D.3914;
35 # BLOCK 2 freq:10000
36 # PRED: ENTRY [100.0%] (fallthru,exec)
37 # PT = nonlocal { D.3926 } (restr)
38 outputFileName.0D.3914_3
39 = (const charD.1 * restrict) outputFileNameD.2600_2(D);
40 # .MEMD.3923_13 = VDEF <.MEMD.3923_12(D)>
41 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
42 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
43 sprintfD.759 (&fileNameD.2604, outputFileName.0D.3914_3);
44 # .MEMD.3923_14 = VDEF <.MEMD.3923_13>
45 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
46 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
47 D.3915_4 = accessD.2606 (&fileNameD.2604, 1);
48 if (D.3915_4 == 0)
49 goto <bb 3>;
50 else
51 goto <bb 4>;
52 # SUCC: 3 [10.0%] (true,exec) 4 [90.0%] (false,exec)
54 # BLOCK 3 freq:1000
55 # PRED: 2 [10.0%] (true,exec)
56 # .MEMD.3923_15 = VDEF <.MEMD.3923_14>
57 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
58 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
59 freeD.898 (ctxD.2601_5(D));
60 goto <bb 7>;
61 # SUCC: 7 [100.0%] (fallthru,exec)
63 # BLOCK 4 freq:9000
64 # PRED: 2 [90.0%] (false,exec)
65 # .MEMD.3923_16 = VDEF <.MEMD.3923_14>
66 # PT = nonlocal escaped
67 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
68 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
69 fpD.2605_8 = fopenD.1805 (&fileNameD.2604[0], 0B);
70 if (fpD.2605_8 == 0B)
71 goto <bb 5>;
72 else
73 goto <bb 6>;
74 # SUCC: 5 [1.9%] (true,exec) 6 [98.1%] (false,exec)
76 # BLOCK 5 freq:173
77 # PRED: 4 [1.9%] (true,exec)
78 # .MEMD.3923_17 = VDEF <.MEMD.3923_16>
79 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
80 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
81 freeD.898 (ctxD.2601_5(D));
82 goto <bb 7>;
83 # SUCC: 7 [100.0%] (fallthru,exec)
85 # BLOCK 6 freq:8827
86 # PRED: 4 [98.1%] (false,exec)
87 # .MEMD.3923_18 = VDEF <.MEMD.3923_16>
88 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
89 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
90 fooD.2599 (outputFileNameD.2600_2(D), fpD.2605_8);
91 # SUCC: 7 [100.0%] (fallthru,exec)
93 # BLOCK 7 freq:10000
94 # PRED: 3 [100.0%] (fallthru,exec) 5 [100.0%] (fallthru,exec)
95 6 [100.0%] (fallthru,exec)
96 # PT = nonlocal null
98 # ctxD.2601_1 = PHI <0B(3), 0B(5), ctxD.2601_5(D)(6)>
99 # .MEMD.3923_11 = PHI <.MEMD.3923_15(3), .MEMD.3923_17(5),
100 .MEMD.3923_18(6)>
101 # VUSE <.MEMD.3923_11>
102 return ctxD.2601_1;
103 # SUCC: EXIT [100.0%]
106 bb 3 and bb 5 can be merged. The blocks have different predecessors, but the
107 same successors, and the same operations.
110 CONTEXT
112 A technique called tail merging (or cross jumping) can fix the example
113 above. For a block, we look for common code at the end (the tail) of the
114 predecessor blocks, and insert jumps from one block to the other.
115 The example is a special case for tail merging, in that 2 whole blocks
116 can be merged, rather than just the end parts of it.
117 We currently only focus on whole block merging, so in that sense
118 calling this pass tail merge is a bit of a misnomer.
120 We distinguish 2 kinds of situations in which blocks can be merged:
121 - same operations, same predecessors. The successor edges coming from one
122 block are redirected to come from the other block.
123 - same operations, same successors. The predecessor edges entering one block
124 are redirected to enter the other block. Note that this operation might
125 involve introducing phi operations.
127 For efficient implementation, we would like to value numbers the blocks, and
128 have a comparison operator that tells us whether the blocks are equal.
129 Besides being runtime efficient, block value numbering should also abstract
130 from irrelevant differences in order of operations, much like normal value
131 numbering abstracts from irrelevant order of operations.
133 For the first situation (same_operations, same predecessors), normal value
134 numbering fits well. We can calculate a block value number based on the
135 value numbers of the defs and vdefs.
137 For the second situation (same operations, same successors), this approach
138 doesn't work so well. We can illustrate this using the example. The calls
139 to free use different vdefs: MEMD.3923_16 and MEMD.3923_14, and these will
140 remain different in value numbering, since they represent different memory
141 states. So the resulting vdefs of the frees will be different in value
142 numbering, so the block value numbers will be different.
144 The reason why we call the blocks equal is not because they define the same
145 values, but because uses in the blocks use (possibly different) defs in the
146 same way. To be able to detect this efficiently, we need to do some kind of
147 reverse value numbering, meaning number the uses rather than the defs, and
148 calculate a block value number based on the value number of the uses.
149 Ideally, a block comparison operator will also indicate which phis are needed
150 to merge the blocks.
152 For the moment, we don't do block value numbering, but we do insn-by-insn
153 matching, using scc value numbers to match operations with results, and
154 structural comparison otherwise, while ignoring vop mismatches.
157 IMPLEMENTATION
159 1. The pass first determines all groups of blocks with the same successor
160 blocks.
161 2. Within each group, it tries to determine clusters of equal basic blocks.
162 3. The clusters are applied.
163 4. The same successor groups are updated.
164 5. This process is repeated from 2 onwards, until no more changes.
167 LIMITATIONS/TODO
169 - block only
170 - handles only 'same operations, same successors'.
171 It handles same predecessors as a special subcase though.
172 - does not implement the reverse value numbering and block value numbering.
173 - improve memory allocation: use garbage collected memory, obstacks,
174 allocpools where appropriate.
175 - no insertion of gimple_reg phis, We only introduce vop-phis.
176 - handle blocks with gimple_reg phi_nodes.
179 SWITCHES
181 - ftree-tail-merge. On at -O2. We may have to enable it only at -Os. */
183 #include "config.h"
184 #include "system.h"
185 #include "coretypes.h"
186 #include "tm.h"
187 #include "tree.h"
188 #include "tm_p.h"
189 #include "basic-block.h"
190 #include "flags.h"
191 #include "function.h"
192 #include "tree-flow.h"
193 #include "bitmap.h"
194 #include "tree-ssa-alias.h"
195 #include "params.h"
196 #include "hash-table.h"
197 #include "gimple-pretty-print.h"
198 #include "tree-ssa-sccvn.h"
199 #include "tree-dump.h"
201 /* ??? This currently runs as part of tree-ssa-pre. Why is this not
202 a stand-alone GIMPLE pass? */
203 #include "tree-pass.h"
205 /* Describes a group of bbs with the same successors. The successor bbs are
206 cached in succs, and the successor edge flags are cached in succ_flags.
207 If a bb has the EDGE_TRUE/VALSE_VALUE flags swapped compared to succ_flags,
208 it's marked in inverse.
209 Additionally, the hash value for the struct is cached in hashval, and
210 in_worklist indicates whether it's currently part of worklist. */
212 struct same_succ_def
214 /* The bbs that have the same successor bbs. */
215 bitmap bbs;
216 /* The successor bbs. */
217 bitmap succs;
218 /* Indicates whether the EDGE_TRUE/FALSE_VALUEs of succ_flags are swapped for
219 bb. */
220 bitmap inverse;
221 /* The edge flags for each of the successor bbs. */
222 VEC (int, heap) *succ_flags;
223 /* Indicates whether the struct is currently in the worklist. */
224 bool in_worklist;
225 /* The hash value of the struct. */
226 hashval_t hashval;
228 /* hash_table support. */
229 typedef same_succ_def value_type;
230 typedef same_succ_def compare_type;
231 static inline hashval_t hash (const value_type *);
232 static int equal (const value_type *, const compare_type *);
233 static void remove (value_type *);
235 typedef struct same_succ_def *same_succ;
236 typedef const struct same_succ_def *const_same_succ;
238 /* hash routine for hash_table support, returns hashval of E. */
240 inline hashval_t
241 same_succ_def::hash (const value_type *e)
243 return e->hashval;
246 /* A group of bbs where 1 bb from bbs can replace the other bbs. */
248 struct bb_cluster_def
250 /* The bbs in the cluster. */
251 bitmap bbs;
252 /* The preds of the bbs in the cluster. */
253 bitmap preds;
254 /* Index in all_clusters vector. */
255 int index;
256 /* The bb to replace the cluster with. */
257 basic_block rep_bb;
259 typedef struct bb_cluster_def *bb_cluster;
260 typedef const struct bb_cluster_def *const_bb_cluster;
262 /* Per bb-info. */
264 struct aux_bb_info
266 /* The number of non-debug statements in the bb. */
267 int size;
268 /* The same_succ that this bb is a member of. */
269 same_succ bb_same_succ;
270 /* The cluster that this bb is a member of. */
271 bb_cluster cluster;
272 /* The vop state at the exit of a bb. This is shortlived data, used to
273 communicate data between update_block_by and update_vuses. */
274 tree vop_at_exit;
275 /* The bb that either contains or is dominated by the dependencies of the
276 bb. */
277 basic_block dep_bb;
280 /* Macros to access the fields of struct aux_bb_info. */
282 #define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size)
283 #define BB_SAME_SUCC(bb) (((struct aux_bb_info *)bb->aux)->bb_same_succ)
284 #define BB_CLUSTER(bb) (((struct aux_bb_info *)bb->aux)->cluster)
285 #define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit)
286 #define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb)
288 /* Returns true if the only effect a statement STMT has, is to define locally
289 used SSA_NAMEs. */
291 static bool
292 stmt_local_def (gimple stmt)
294 basic_block bb, def_bb;
295 imm_use_iterator iter;
296 use_operand_p use_p;
297 tree val;
298 def_operand_p def_p;
300 if (gimple_has_side_effects (stmt))
301 return false;
303 def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF);
304 if (def_p == NULL)
305 return false;
307 val = DEF_FROM_PTR (def_p);
308 if (val == NULL_TREE || TREE_CODE (val) != SSA_NAME)
309 return false;
311 def_bb = gimple_bb (stmt);
313 FOR_EACH_IMM_USE_FAST (use_p, iter, val)
315 if (is_gimple_debug (USE_STMT (use_p)))
316 continue;
317 bb = gimple_bb (USE_STMT (use_p));
318 if (bb == def_bb)
319 continue;
321 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
322 && EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src == def_bb)
323 continue;
325 return false;
328 return true;
331 /* Let GSI skip forwards over local defs. */
333 static void
334 gsi_advance_fw_nondebug_nonlocal (gimple_stmt_iterator *gsi)
336 gimple stmt;
338 while (true)
340 if (gsi_end_p (*gsi))
341 return;
342 stmt = gsi_stmt (*gsi);
343 if (!stmt_local_def (stmt))
344 return;
345 gsi_next_nondebug (gsi);
349 /* VAL1 and VAL2 are either:
350 - uses in BB1 and BB2, or
351 - phi alternatives for BB1 and BB2.
352 Return true if the uses have the same gvn value. */
354 static bool
355 gvn_uses_equal (tree val1, tree val2)
357 gcc_checking_assert (val1 != NULL_TREE && val2 != NULL_TREE);
359 if (val1 == val2)
360 return true;
362 if (vn_valueize (val1) != vn_valueize (val2))
363 return false;
365 return ((TREE_CODE (val1) == SSA_NAME || CONSTANT_CLASS_P (val1))
366 && (TREE_CODE (val2) == SSA_NAME || CONSTANT_CLASS_P (val2)));
369 /* Prints E to FILE. */
371 static void
372 same_succ_print (FILE *file, const same_succ e)
374 unsigned int i;
375 bitmap_print (file, e->bbs, "bbs:", "\n");
376 bitmap_print (file, e->succs, "succs:", "\n");
377 bitmap_print (file, e->inverse, "inverse:", "\n");
378 fprintf (file, "flags:");
379 for (i = 0; i < VEC_length (int, e->succ_flags); ++i)
380 fprintf (file, " %x", VEC_index (int, e->succ_flags, i));
381 fprintf (file, "\n");
384 /* Prints same_succ VE to VFILE. */
386 inline int
387 ssa_same_succ_print_traverse (same_succ *pe, FILE *file)
389 const same_succ e = *pe;
390 same_succ_print (file, e);
391 return 1;
394 /* Update BB_DEP_BB (USE_BB), given a use of VAL in USE_BB. */
396 static void
397 update_dep_bb (basic_block use_bb, tree val)
399 basic_block dep_bb;
401 /* Not a dep. */
402 if (TREE_CODE (val) != SSA_NAME)
403 return;
405 /* Skip use of global def. */
406 if (SSA_NAME_IS_DEFAULT_DEF (val))
407 return;
409 /* Skip use of local def. */
410 dep_bb = gimple_bb (SSA_NAME_DEF_STMT (val));
411 if (dep_bb == use_bb)
412 return;
414 if (BB_DEP_BB (use_bb) == NULL
415 || dominated_by_p (CDI_DOMINATORS, dep_bb, BB_DEP_BB (use_bb)))
416 BB_DEP_BB (use_bb) = dep_bb;
419 /* Update BB_DEP_BB, given the dependencies in STMT. */
421 static void
422 stmt_update_dep_bb (gimple stmt)
424 ssa_op_iter iter;
425 use_operand_p use;
427 FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE)
428 update_dep_bb (gimple_bb (stmt), USE_FROM_PTR (use));
431 /* Calculates hash value for same_succ VE. */
433 static hashval_t
434 same_succ_hash (const_same_succ e)
436 hashval_t hashval = bitmap_hash (e->succs);
437 int flags;
438 unsigned int i;
439 unsigned int first = bitmap_first_set_bit (e->bbs);
440 basic_block bb = BASIC_BLOCK (first);
441 int size = 0;
442 gimple_stmt_iterator gsi;
443 gimple stmt;
444 tree arg;
445 unsigned int s;
446 bitmap_iterator bs;
448 for (gsi = gsi_start_nondebug_bb (bb);
449 !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
451 stmt = gsi_stmt (gsi);
452 stmt_update_dep_bb (stmt);
453 if (stmt_local_def (stmt))
454 continue;
455 size++;
457 hashval = iterative_hash_hashval_t (gimple_code (stmt), hashval);
458 if (is_gimple_assign (stmt))
459 hashval = iterative_hash_hashval_t (gimple_assign_rhs_code (stmt),
460 hashval);
461 if (!is_gimple_call (stmt))
462 continue;
463 if (gimple_call_internal_p (stmt))
464 hashval = iterative_hash_hashval_t
465 ((hashval_t) gimple_call_internal_fn (stmt), hashval);
466 else
467 hashval = iterative_hash_expr (gimple_call_fn (stmt), hashval);
468 for (i = 0; i < gimple_call_num_args (stmt); i++)
470 arg = gimple_call_arg (stmt, i);
471 arg = vn_valueize (arg);
472 hashval = iterative_hash_expr (arg, hashval);
476 hashval = iterative_hash_hashval_t (size, hashval);
477 BB_SIZE (bb) = size;
479 for (i = 0; i < VEC_length (int, e->succ_flags); ++i)
481 flags = VEC_index (int, e->succ_flags, i);
482 flags = flags & ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
483 hashval = iterative_hash_hashval_t (flags, hashval);
486 EXECUTE_IF_SET_IN_BITMAP (e->succs, 0, s, bs)
488 int n = find_edge (bb, BASIC_BLOCK (s))->dest_idx;
489 for (gsi = gsi_start_phis (BASIC_BLOCK (s)); !gsi_end_p (gsi);
490 gsi_next (&gsi))
492 gimple phi = gsi_stmt (gsi);
493 tree lhs = gimple_phi_result (phi);
494 tree val = gimple_phi_arg_def (phi, n);
496 if (virtual_operand_p (lhs))
497 continue;
498 update_dep_bb (bb, val);
502 return hashval;
505 /* Returns true if E1 and E2 have 2 successors, and if the successor flags
506 are inverse for the EDGE_TRUE_VALUE and EDGE_FALSE_VALUE flags, and equal for
507 the other edge flags. */
509 static bool
510 inverse_flags (const_same_succ e1, const_same_succ e2)
512 int f1a, f1b, f2a, f2b;
513 int mask = ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
515 if (VEC_length (int, e1->succ_flags) != 2)
516 return false;
518 f1a = VEC_index (int, e1->succ_flags, 0);
519 f1b = VEC_index (int, e1->succ_flags, 1);
520 f2a = VEC_index (int, e2->succ_flags, 0);
521 f2b = VEC_index (int, e2->succ_flags, 1);
523 if (f1a == f2a && f1b == f2b)
524 return false;
526 return (f1a & mask) == (f2a & mask) && (f1b & mask) == (f2b & mask);
529 /* Compares SAME_SUCCs E1 and E2. */
532 same_succ_def::equal (const value_type *e1, const compare_type *e2)
534 unsigned int i, first1, first2;
535 gimple_stmt_iterator gsi1, gsi2;
536 gimple s1, s2;
537 basic_block bb1, bb2;
539 if (e1->hashval != e2->hashval)
540 return 0;
542 if (VEC_length (int, e1->succ_flags) != VEC_length (int, e2->succ_flags))
543 return 0;
545 if (!bitmap_equal_p (e1->succs, e2->succs))
546 return 0;
548 if (!inverse_flags (e1, e2))
550 for (i = 0; i < VEC_length (int, e1->succ_flags); ++i)
551 if (VEC_index (int, e1->succ_flags, i)
552 != VEC_index (int, e1->succ_flags, i))
553 return 0;
556 first1 = bitmap_first_set_bit (e1->bbs);
557 first2 = bitmap_first_set_bit (e2->bbs);
559 bb1 = BASIC_BLOCK (first1);
560 bb2 = BASIC_BLOCK (first2);
562 if (BB_SIZE (bb1) != BB_SIZE (bb2))
563 return 0;
565 gsi1 = gsi_start_nondebug_bb (bb1);
566 gsi2 = gsi_start_nondebug_bb (bb2);
567 gsi_advance_fw_nondebug_nonlocal (&gsi1);
568 gsi_advance_fw_nondebug_nonlocal (&gsi2);
569 while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2)))
571 s1 = gsi_stmt (gsi1);
572 s2 = gsi_stmt (gsi2);
573 if (gimple_code (s1) != gimple_code (s2))
574 return 0;
575 if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2))
576 return 0;
577 gsi_next_nondebug (&gsi1);
578 gsi_next_nondebug (&gsi2);
579 gsi_advance_fw_nondebug_nonlocal (&gsi1);
580 gsi_advance_fw_nondebug_nonlocal (&gsi2);
583 return 1;
586 /* Alloc and init a new SAME_SUCC. */
588 static same_succ
589 same_succ_alloc (void)
591 same_succ same = XNEW (struct same_succ_def);
593 same->bbs = BITMAP_ALLOC (NULL);
594 same->succs = BITMAP_ALLOC (NULL);
595 same->inverse = BITMAP_ALLOC (NULL);
596 same->succ_flags = VEC_alloc (int, heap, 10);
597 same->in_worklist = false;
599 return same;
602 /* Delete same_succ E. */
604 void
605 same_succ_def::remove (same_succ e)
607 BITMAP_FREE (e->bbs);
608 BITMAP_FREE (e->succs);
609 BITMAP_FREE (e->inverse);
610 VEC_free (int, heap, e->succ_flags);
612 XDELETE (e);
615 /* Reset same_succ SAME. */
617 static void
618 same_succ_reset (same_succ same)
620 bitmap_clear (same->bbs);
621 bitmap_clear (same->succs);
622 bitmap_clear (same->inverse);
623 VEC_truncate (int, same->succ_flags, 0);
626 static hash_table <same_succ_def> same_succ_htab;
628 /* Array that is used to store the edge flags for a successor. */
630 static int *same_succ_edge_flags;
632 /* Bitmap that is used to mark bbs that are recently deleted. */
634 static bitmap deleted_bbs;
636 /* Bitmap that is used to mark predecessors of bbs that are
637 deleted. */
639 static bitmap deleted_bb_preds;
641 /* Prints same_succ_htab to stderr. */
643 extern void debug_same_succ (void);
644 DEBUG_FUNCTION void
645 debug_same_succ ( void)
647 same_succ_htab.traverse <FILE *, ssa_same_succ_print_traverse> (stderr);
650 DEF_VEC_P (same_succ);
651 DEF_VEC_ALLOC_P (same_succ, heap);
653 /* Vector of bbs to process. */
655 static VEC (same_succ, heap) *worklist;
657 /* Prints worklist to FILE. */
659 static void
660 print_worklist (FILE *file)
662 unsigned int i;
663 for (i = 0; i < VEC_length (same_succ, worklist); ++i)
664 same_succ_print (file, VEC_index (same_succ, worklist, i));
667 /* Adds SAME to worklist. */
669 static void
670 add_to_worklist (same_succ same)
672 if (same->in_worklist)
673 return;
675 if (bitmap_count_bits (same->bbs) < 2)
676 return;
678 same->in_worklist = true;
679 VEC_safe_push (same_succ, heap, worklist, same);
682 /* Add BB to same_succ_htab. */
684 static void
685 find_same_succ_bb (basic_block bb, same_succ *same_p)
687 unsigned int j;
688 bitmap_iterator bj;
689 same_succ same = *same_p;
690 same_succ *slot;
691 edge_iterator ei;
692 edge e;
694 if (bb == NULL)
695 return;
696 bitmap_set_bit (same->bbs, bb->index);
697 FOR_EACH_EDGE (e, ei, bb->succs)
699 int index = e->dest->index;
700 bitmap_set_bit (same->succs, index);
701 same_succ_edge_flags[index] = e->flags;
703 EXECUTE_IF_SET_IN_BITMAP (same->succs, 0, j, bj)
704 VEC_safe_push (int, heap, same->succ_flags, same_succ_edge_flags[j]);
706 same->hashval = same_succ_hash (same);
708 slot = same_succ_htab.find_slot_with_hash (same, same->hashval, INSERT);
709 if (*slot == NULL)
711 *slot = same;
712 BB_SAME_SUCC (bb) = same;
713 add_to_worklist (same);
714 *same_p = NULL;
716 else
718 bitmap_set_bit ((*slot)->bbs, bb->index);
719 BB_SAME_SUCC (bb) = *slot;
720 add_to_worklist (*slot);
721 if (inverse_flags (same, *slot))
722 bitmap_set_bit ((*slot)->inverse, bb->index);
723 same_succ_reset (same);
727 /* Find bbs with same successors. */
729 static void
730 find_same_succ (void)
732 same_succ same = same_succ_alloc ();
733 basic_block bb;
735 FOR_EACH_BB (bb)
737 find_same_succ_bb (bb, &same);
738 if (same == NULL)
739 same = same_succ_alloc ();
742 same_succ_def::remove (same);
745 /* Initializes worklist administration. */
747 static void
748 init_worklist (void)
750 alloc_aux_for_blocks (sizeof (struct aux_bb_info));
751 same_succ_htab.create (n_basic_blocks);
752 same_succ_edge_flags = XCNEWVEC (int, last_basic_block);
753 deleted_bbs = BITMAP_ALLOC (NULL);
754 deleted_bb_preds = BITMAP_ALLOC (NULL);
755 worklist = VEC_alloc (same_succ, heap, n_basic_blocks);
756 find_same_succ ();
758 if (dump_file && (dump_flags & TDF_DETAILS))
760 fprintf (dump_file, "initial worklist:\n");
761 print_worklist (dump_file);
765 /* Deletes worklist administration. */
767 static void
768 delete_worklist (void)
770 free_aux_for_blocks ();
771 same_succ_htab.dispose ();
772 XDELETEVEC (same_succ_edge_flags);
773 same_succ_edge_flags = NULL;
774 BITMAP_FREE (deleted_bbs);
775 BITMAP_FREE (deleted_bb_preds);
776 VEC_free (same_succ, heap, worklist);
779 /* Mark BB as deleted, and mark its predecessors. */
781 static void
782 mark_basic_block_deleted (basic_block bb)
784 edge e;
785 edge_iterator ei;
787 bitmap_set_bit (deleted_bbs, bb->index);
789 FOR_EACH_EDGE (e, ei, bb->preds)
790 bitmap_set_bit (deleted_bb_preds, e->src->index);
793 /* Removes BB from its corresponding same_succ. */
795 static void
796 same_succ_flush_bb (basic_block bb)
798 same_succ same = BB_SAME_SUCC (bb);
799 BB_SAME_SUCC (bb) = NULL;
800 if (bitmap_single_bit_set_p (same->bbs))
801 same_succ_htab.remove_elt_with_hash (same, same->hashval);
802 else
803 bitmap_clear_bit (same->bbs, bb->index);
806 /* Removes all bbs in BBS from their corresponding same_succ. */
808 static void
809 same_succ_flush_bbs (bitmap bbs)
811 unsigned int i;
812 bitmap_iterator bi;
814 EXECUTE_IF_SET_IN_BITMAP (bbs, 0, i, bi)
815 same_succ_flush_bb (BASIC_BLOCK (i));
818 /* Release the last vdef in BB, either normal or phi result. */
820 static void
821 release_last_vdef (basic_block bb)
823 gimple_stmt_iterator i;
825 for (i = gsi_last_bb (bb); !gsi_end_p (i); gsi_prev_nondebug (&i))
827 gimple stmt = gsi_stmt (i);
828 if (gimple_vdef (stmt) == NULL_TREE)
829 continue;
831 mark_virtual_operand_for_renaming (gimple_vdef (stmt));
832 return;
835 for (i = gsi_start_phis (bb); !gsi_end_p (i); gsi_next (&i))
837 gimple phi = gsi_stmt (i);
838 tree res = gimple_phi_result (phi);
840 if (!virtual_operand_p (res))
841 continue;
843 mark_virtual_phi_result_for_renaming (phi);
844 return;
849 /* For deleted_bb_preds, find bbs with same successors. */
851 static void
852 update_worklist (void)
854 unsigned int i;
855 bitmap_iterator bi;
856 basic_block bb;
857 same_succ same;
859 bitmap_and_compl_into (deleted_bb_preds, deleted_bbs);
860 bitmap_clear (deleted_bbs);
862 bitmap_clear_bit (deleted_bb_preds, ENTRY_BLOCK);
863 same_succ_flush_bbs (deleted_bb_preds);
865 same = same_succ_alloc ();
866 EXECUTE_IF_SET_IN_BITMAP (deleted_bb_preds, 0, i, bi)
868 bb = BASIC_BLOCK (i);
869 gcc_assert (bb != NULL);
870 find_same_succ_bb (bb, &same);
871 if (same == NULL)
872 same = same_succ_alloc ();
874 same_succ_def::remove (same);
875 bitmap_clear (deleted_bb_preds);
878 /* Prints cluster C to FILE. */
880 static void
881 print_cluster (FILE *file, bb_cluster c)
883 if (c == NULL)
884 return;
885 bitmap_print (file, c->bbs, "bbs:", "\n");
886 bitmap_print (file, c->preds, "preds:", "\n");
889 /* Prints cluster C to stderr. */
891 extern void debug_cluster (bb_cluster);
892 DEBUG_FUNCTION void
893 debug_cluster (bb_cluster c)
895 print_cluster (stderr, c);
898 /* Update C->rep_bb, given that BB is added to the cluster. */
900 static void
901 update_rep_bb (bb_cluster c, basic_block bb)
903 /* Initial. */
904 if (c->rep_bb == NULL)
906 c->rep_bb = bb;
907 return;
910 /* Current needs no deps, keep it. */
911 if (BB_DEP_BB (c->rep_bb) == NULL)
912 return;
914 /* Bb needs no deps, change rep_bb. */
915 if (BB_DEP_BB (bb) == NULL)
917 c->rep_bb = bb;
918 return;
921 /* Bb needs last deps earlier than current, change rep_bb. A potential
922 problem with this, is that the first deps might also be earlier, which
923 would mean we prefer longer lifetimes for the deps. To be able to check
924 for this, we would have to trace BB_FIRST_DEP_BB as well, besides
925 BB_DEP_BB, which is really BB_LAST_DEP_BB.
926 The benefit of choosing the bb with last deps earlier, is that it can
927 potentially be used as replacement for more bbs. */
928 if (dominated_by_p (CDI_DOMINATORS, BB_DEP_BB (c->rep_bb), BB_DEP_BB (bb)))
929 c->rep_bb = bb;
932 /* Add BB to cluster C. Sets BB in C->bbs, and preds of BB in C->preds. */
934 static void
935 add_bb_to_cluster (bb_cluster c, basic_block bb)
937 edge e;
938 edge_iterator ei;
940 bitmap_set_bit (c->bbs, bb->index);
942 FOR_EACH_EDGE (e, ei, bb->preds)
943 bitmap_set_bit (c->preds, e->src->index);
945 update_rep_bb (c, bb);
948 /* Allocate and init new cluster. */
950 static bb_cluster
951 new_cluster (void)
953 bb_cluster c;
954 c = XCNEW (struct bb_cluster_def);
955 c->bbs = BITMAP_ALLOC (NULL);
956 c->preds = BITMAP_ALLOC (NULL);
957 c->rep_bb = NULL;
958 return c;
961 /* Delete clusters. */
963 static void
964 delete_cluster (bb_cluster c)
966 if (c == NULL)
967 return;
968 BITMAP_FREE (c->bbs);
969 BITMAP_FREE (c->preds);
970 XDELETE (c);
973 DEF_VEC_P (bb_cluster);
974 DEF_VEC_ALLOC_P (bb_cluster, heap);
976 /* Array that contains all clusters. */
978 static VEC (bb_cluster, heap) *all_clusters;
980 /* Allocate all cluster vectors. */
982 static void
983 alloc_cluster_vectors (void)
985 all_clusters = VEC_alloc (bb_cluster, heap, n_basic_blocks);
988 /* Reset all cluster vectors. */
990 static void
991 reset_cluster_vectors (void)
993 unsigned int i;
994 basic_block bb;
995 for (i = 0; i < VEC_length (bb_cluster, all_clusters); ++i)
996 delete_cluster (VEC_index (bb_cluster, all_clusters, i));
997 VEC_truncate (bb_cluster, all_clusters, 0);
998 FOR_EACH_BB (bb)
999 BB_CLUSTER (bb) = NULL;
1002 /* Delete all cluster vectors. */
1004 static void
1005 delete_cluster_vectors (void)
1007 unsigned int i;
1008 for (i = 0; i < VEC_length (bb_cluster, all_clusters); ++i)
1009 delete_cluster (VEC_index (bb_cluster, all_clusters, i));
1010 VEC_free (bb_cluster, heap, all_clusters);
1013 /* Merge cluster C2 into C1. */
1015 static void
1016 merge_clusters (bb_cluster c1, bb_cluster c2)
1018 bitmap_ior_into (c1->bbs, c2->bbs);
1019 bitmap_ior_into (c1->preds, c2->preds);
1022 /* Register equivalence of BB1 and BB2 (members of cluster C). Store c in
1023 all_clusters, or merge c with existing cluster. */
1025 static void
1026 set_cluster (basic_block bb1, basic_block bb2)
1028 basic_block merge_bb, other_bb;
1029 bb_cluster merge, old, c;
1031 if (BB_CLUSTER (bb1) == NULL && BB_CLUSTER (bb2) == NULL)
1033 c = new_cluster ();
1034 add_bb_to_cluster (c, bb1);
1035 add_bb_to_cluster (c, bb2);
1036 BB_CLUSTER (bb1) = c;
1037 BB_CLUSTER (bb2) = c;
1038 c->index = VEC_length (bb_cluster, all_clusters);
1039 VEC_safe_push (bb_cluster, heap, all_clusters, c);
1041 else if (BB_CLUSTER (bb1) == NULL || BB_CLUSTER (bb2) == NULL)
1043 merge_bb = BB_CLUSTER (bb1) == NULL ? bb2 : bb1;
1044 other_bb = BB_CLUSTER (bb1) == NULL ? bb1 : bb2;
1045 merge = BB_CLUSTER (merge_bb);
1046 add_bb_to_cluster (merge, other_bb);
1047 BB_CLUSTER (other_bb) = merge;
1049 else if (BB_CLUSTER (bb1) != BB_CLUSTER (bb2))
1051 unsigned int i;
1052 bitmap_iterator bi;
1054 old = BB_CLUSTER (bb2);
1055 merge = BB_CLUSTER (bb1);
1056 merge_clusters (merge, old);
1057 EXECUTE_IF_SET_IN_BITMAP (old->bbs, 0, i, bi)
1058 BB_CLUSTER (BASIC_BLOCK (i)) = merge;
1059 VEC_replace (bb_cluster, all_clusters, old->index, NULL);
1060 update_rep_bb (merge, old->rep_bb);
1061 delete_cluster (old);
1063 else
1064 gcc_unreachable ();
1067 /* Return true if gimple statements S1 and S2 are equal. Gimple_bb (s1) and
1068 gimple_bb (s2) are members of SAME_SUCC. */
1070 static bool
1071 gimple_equal_p (same_succ same_succ, gimple s1, gimple s2)
1073 unsigned int i;
1074 tree lhs1, lhs2;
1075 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1076 tree t1, t2;
1077 bool equal, inv_cond;
1078 enum tree_code code1, code2;
1080 if (gimple_code (s1) != gimple_code (s2))
1081 return false;
1083 switch (gimple_code (s1))
1085 case GIMPLE_CALL:
1086 if (gimple_call_num_args (s1) != gimple_call_num_args (s2))
1087 return false;
1088 if (!gimple_call_same_target_p (s1, s2))
1089 return false;
1091 /* Eventually, we'll significantly complicate the CFG by adding
1092 back edges to properly model the effects of transaction restart.
1093 For the bulk of optimization this does not matter, but what we
1094 cannot recover from is tail merging blocks between two separate
1095 transactions. Avoid that by making commit not match. */
1096 if (gimple_call_builtin_p (s1, BUILT_IN_TM_COMMIT))
1097 return false;
1099 equal = true;
1100 for (i = 0; i < gimple_call_num_args (s1); ++i)
1102 t1 = gimple_call_arg (s1, i);
1103 t2 = gimple_call_arg (s2, i);
1104 if (operand_equal_p (t1, t2, 0))
1105 continue;
1106 if (gvn_uses_equal (t1, t2))
1107 continue;
1108 equal = false;
1109 break;
1111 if (!equal)
1112 return false;
1114 lhs1 = gimple_get_lhs (s1);
1115 lhs2 = gimple_get_lhs (s2);
1116 if (lhs1 == NULL_TREE && lhs2 == NULL_TREE)
1117 return true;
1118 if (lhs1 == NULL_TREE || lhs2 == NULL_TREE)
1119 return false;
1120 if (TREE_CODE (lhs1) == SSA_NAME && TREE_CODE (lhs2) == SSA_NAME)
1121 return vn_valueize (lhs1) == vn_valueize (lhs2);
1122 return operand_equal_p (lhs1, lhs2, 0);
1124 case GIMPLE_ASSIGN:
1125 lhs1 = gimple_get_lhs (s1);
1126 lhs2 = gimple_get_lhs (s2);
1127 if (gimple_vdef (s1))
1129 if (vn_valueize (gimple_vdef (s1)) != vn_valueize (gimple_vdef (s2)))
1130 return false;
1131 if (TREE_CODE (lhs1) != SSA_NAME
1132 && TREE_CODE (lhs2) != SSA_NAME)
1133 return true;
1135 return (TREE_CODE (lhs1) == SSA_NAME
1136 && TREE_CODE (lhs2) == SSA_NAME
1137 && vn_valueize (lhs1) == vn_valueize (lhs2));
1139 case GIMPLE_COND:
1140 t1 = gimple_cond_lhs (s1);
1141 t2 = gimple_cond_lhs (s2);
1142 if (!operand_equal_p (t1, t2, 0)
1143 && !gvn_uses_equal (t1, t2))
1144 return false;
1146 t1 = gimple_cond_rhs (s1);
1147 t2 = gimple_cond_rhs (s2);
1148 if (!operand_equal_p (t1, t2, 0)
1149 && !gvn_uses_equal (t1, t2))
1150 return false;
1152 code1 = gimple_expr_code (s1);
1153 code2 = gimple_expr_code (s2);
1154 inv_cond = (bitmap_bit_p (same_succ->inverse, bb1->index)
1155 != bitmap_bit_p (same_succ->inverse, bb2->index));
1156 if (inv_cond)
1158 bool honor_nans
1159 = HONOR_NANS (TYPE_MODE (TREE_TYPE (gimple_cond_lhs (s1))));
1160 code2 = invert_tree_comparison (code2, honor_nans);
1162 return code1 == code2;
1164 default:
1165 return false;
1169 /* Let GSI skip backwards over local defs. Return the earliest vuse in VUSE.
1170 Return true in VUSE_ESCAPED if the vuse influenced a SSA_OP_DEF of one of the
1171 processed statements. */
1173 static void
1174 gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator *gsi, tree *vuse,
1175 bool *vuse_escaped)
1177 gimple stmt;
1178 tree lvuse;
1180 while (true)
1182 if (gsi_end_p (*gsi))
1183 return;
1184 stmt = gsi_stmt (*gsi);
1186 lvuse = gimple_vuse (stmt);
1187 if (lvuse != NULL_TREE)
1189 *vuse = lvuse;
1190 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_DEF))
1191 *vuse_escaped = true;
1194 if (!stmt_local_def (stmt))
1195 return;
1196 gsi_prev_nondebug (gsi);
1200 /* Determines whether BB1 and BB2 (members of same_succ) are duplicates. If so,
1201 clusters them. */
1203 static void
1204 find_duplicate (same_succ same_succ, basic_block bb1, basic_block bb2)
1206 gimple_stmt_iterator gsi1 = gsi_last_nondebug_bb (bb1);
1207 gimple_stmt_iterator gsi2 = gsi_last_nondebug_bb (bb2);
1208 tree vuse1 = NULL_TREE, vuse2 = NULL_TREE;
1209 bool vuse_escaped = false;
1211 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1212 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1214 while (!gsi_end_p (gsi1) && !gsi_end_p (gsi2))
1216 gimple stmt1 = gsi_stmt (gsi1);
1217 gimple stmt2 = gsi_stmt (gsi2);
1219 if (!gimple_equal_p (same_succ, stmt1, stmt2))
1220 return;
1222 // We cannot tail-merge the builtins that end transactions.
1223 // ??? The alternative being unsharing of BBs in the tm_init pass.
1224 if (flag_tm
1225 && is_gimple_call (stmt1)
1226 && (gimple_call_flags (stmt1) & ECF_TM_BUILTIN)
1227 && is_tm_ending_fndecl (gimple_call_fndecl (stmt1)))
1228 return;
1230 gsi_prev_nondebug (&gsi1);
1231 gsi_prev_nondebug (&gsi2);
1232 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1233 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1236 if (!(gsi_end_p (gsi1) && gsi_end_p (gsi2)))
1237 return;
1239 /* If the incoming vuses are not the same, and the vuse escaped into an
1240 SSA_OP_DEF, then merging the 2 blocks will change the value of the def,
1241 which potentially means the semantics of one of the blocks will be changed.
1242 TODO: make this check more precise. */
1243 if (vuse_escaped && vuse1 != vuse2)
1244 return;
1246 if (dump_file)
1247 fprintf (dump_file, "find_duplicates: <bb %d> duplicate of <bb %d>\n",
1248 bb1->index, bb2->index);
1250 set_cluster (bb1, bb2);
1253 /* Returns whether for all phis in DEST the phi alternatives for E1 and
1254 E2 are equal. */
1256 static bool
1257 same_phi_alternatives_1 (basic_block dest, edge e1, edge e2)
1259 int n1 = e1->dest_idx, n2 = e2->dest_idx;
1260 gimple_stmt_iterator gsi;
1262 for (gsi = gsi_start_phis (dest); !gsi_end_p (gsi); gsi_next (&gsi))
1264 gimple phi = gsi_stmt (gsi);
1265 tree lhs = gimple_phi_result (phi);
1266 tree val1 = gimple_phi_arg_def (phi, n1);
1267 tree val2 = gimple_phi_arg_def (phi, n2);
1269 if (virtual_operand_p (lhs))
1270 continue;
1272 if (operand_equal_for_phi_arg_p (val1, val2))
1273 continue;
1274 if (gvn_uses_equal (val1, val2))
1275 continue;
1277 return false;
1280 return true;
1283 /* Returns whether for all successors of BB1 and BB2 (members of SAME_SUCC), the
1284 phi alternatives for BB1 and BB2 are equal. */
1286 static bool
1287 same_phi_alternatives (same_succ same_succ, basic_block bb1, basic_block bb2)
1289 unsigned int s;
1290 bitmap_iterator bs;
1291 edge e1, e2;
1292 basic_block succ;
1294 EXECUTE_IF_SET_IN_BITMAP (same_succ->succs, 0, s, bs)
1296 succ = BASIC_BLOCK (s);
1297 e1 = find_edge (bb1, succ);
1298 e2 = find_edge (bb2, succ);
1299 if (e1->flags & EDGE_COMPLEX
1300 || e2->flags & EDGE_COMPLEX)
1301 return false;
1303 /* For all phis in bb, the phi alternatives for e1 and e2 need to have
1304 the same value. */
1305 if (!same_phi_alternatives_1 (succ, e1, e2))
1306 return false;
1309 return true;
1312 /* Return true if BB has non-vop phis. */
1314 static bool
1315 bb_has_non_vop_phi (basic_block bb)
1317 gimple_seq phis = phi_nodes (bb);
1318 gimple phi;
1320 if (phis == NULL)
1321 return false;
1323 if (!gimple_seq_singleton_p (phis))
1324 return true;
1326 phi = gimple_seq_first_stmt (phis);
1327 return !virtual_operand_p (gimple_phi_result (phi));
1330 /* Returns true if redirecting the incoming edges of FROM to TO maintains the
1331 invariant that uses in FROM are dominates by their defs. */
1333 static bool
1334 deps_ok_for_redirect_from_bb_to_bb (basic_block from, basic_block to)
1336 basic_block cd, dep_bb = BB_DEP_BB (to);
1337 edge_iterator ei;
1338 edge e;
1339 bitmap from_preds = BITMAP_ALLOC (NULL);
1341 if (dep_bb == NULL)
1342 return true;
1344 FOR_EACH_EDGE (e, ei, from->preds)
1345 bitmap_set_bit (from_preds, e->src->index);
1346 cd = nearest_common_dominator_for_set (CDI_DOMINATORS, from_preds);
1347 BITMAP_FREE (from_preds);
1349 return dominated_by_p (CDI_DOMINATORS, dep_bb, cd);
1352 /* Returns true if replacing BB1 (or its replacement bb) by BB2 (or its
1353 replacement bb) and vice versa maintains the invariant that uses in the
1354 replacement are dominates by their defs. */
1356 static bool
1357 deps_ok_for_redirect (basic_block bb1, basic_block bb2)
1359 if (BB_CLUSTER (bb1) != NULL)
1360 bb1 = BB_CLUSTER (bb1)->rep_bb;
1362 if (BB_CLUSTER (bb2) != NULL)
1363 bb2 = BB_CLUSTER (bb2)->rep_bb;
1365 return (deps_ok_for_redirect_from_bb_to_bb (bb1, bb2)
1366 && deps_ok_for_redirect_from_bb_to_bb (bb2, bb1));
1369 /* Within SAME_SUCC->bbs, find clusters of bbs which can be merged. */
1371 static void
1372 find_clusters_1 (same_succ same_succ)
1374 basic_block bb1, bb2;
1375 unsigned int i, j;
1376 bitmap_iterator bi, bj;
1377 int nr_comparisons;
1378 int max_comparisons = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_COMPARISONS);
1380 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, 0, i, bi)
1382 bb1 = BASIC_BLOCK (i);
1384 /* TODO: handle blocks with phi-nodes. We'll have to find corresponding
1385 phi-nodes in bb1 and bb2, with the same alternatives for the same
1386 preds. */
1387 if (bb_has_non_vop_phi (bb1))
1388 continue;
1390 nr_comparisons = 0;
1391 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, i + 1, j, bj)
1393 bb2 = BASIC_BLOCK (j);
1395 if (bb_has_non_vop_phi (bb2))
1396 continue;
1398 if (BB_CLUSTER (bb1) != NULL && BB_CLUSTER (bb1) == BB_CLUSTER (bb2))
1399 continue;
1401 /* Limit quadratic behaviour. */
1402 nr_comparisons++;
1403 if (nr_comparisons > max_comparisons)
1404 break;
1406 /* This is a conservative dependency check. We could test more
1407 precise for allowed replacement direction. */
1408 if (!deps_ok_for_redirect (bb1, bb2))
1409 continue;
1411 if (!(same_phi_alternatives (same_succ, bb1, bb2)))
1412 continue;
1414 find_duplicate (same_succ, bb1, bb2);
1419 /* Find clusters of bbs which can be merged. */
1421 static void
1422 find_clusters (void)
1424 same_succ same;
1426 while (!VEC_empty (same_succ, worklist))
1428 same = VEC_pop (same_succ, worklist);
1429 same->in_worklist = false;
1430 if (dump_file && (dump_flags & TDF_DETAILS))
1432 fprintf (dump_file, "processing worklist entry\n");
1433 same_succ_print (dump_file, same);
1435 find_clusters_1 (same);
1439 /* Returns the vop phi of BB, if any. */
1441 static gimple
1442 vop_phi (basic_block bb)
1444 gimple stmt;
1445 gimple_stmt_iterator gsi;
1446 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1448 stmt = gsi_stmt (gsi);
1449 if (! virtual_operand_p (gimple_phi_result (stmt)))
1450 continue;
1451 return stmt;
1453 return NULL;
1456 /* Redirect all edges from BB1 to BB2, removes BB1 and marks it as removed. */
1458 static void
1459 replace_block_by (basic_block bb1, basic_block bb2)
1461 edge pred_edge;
1462 unsigned int i;
1463 gimple bb2_phi;
1465 bb2_phi = vop_phi (bb2);
1467 /* Mark the basic block as deleted. */
1468 mark_basic_block_deleted (bb1);
1470 /* Redirect the incoming edges of bb1 to bb2. */
1471 for (i = EDGE_COUNT (bb1->preds); i > 0 ; --i)
1473 pred_edge = EDGE_PRED (bb1, i - 1);
1474 pred_edge = redirect_edge_and_branch (pred_edge, bb2);
1475 gcc_assert (pred_edge != NULL);
1477 if (bb2_phi == NULL)
1478 continue;
1480 /* The phi might have run out of capacity when the redirect added an
1481 argument, which means it could have been replaced. Refresh it. */
1482 bb2_phi = vop_phi (bb2);
1484 add_phi_arg (bb2_phi, SSA_NAME_VAR (gimple_phi_result (bb2_phi)),
1485 pred_edge, UNKNOWN_LOCATION);
1488 bb2->frequency += bb1->frequency;
1489 if (bb2->frequency > BB_FREQ_MAX)
1490 bb2->frequency = BB_FREQ_MAX;
1492 bb2->count += bb1->count;
1494 /* Do updates that use bb1, before deleting bb1. */
1495 release_last_vdef (bb1);
1496 same_succ_flush_bb (bb1);
1498 delete_basic_block (bb1);
1501 /* Bbs for which update_debug_stmt need to be called. */
1503 static bitmap update_bbs;
1505 /* For each cluster in all_clusters, merge all cluster->bbs. Returns
1506 number of bbs removed. */
1508 static int
1509 apply_clusters (void)
1511 basic_block bb1, bb2;
1512 bb_cluster c;
1513 unsigned int i, j;
1514 bitmap_iterator bj;
1515 int nr_bbs_removed = 0;
1517 for (i = 0; i < VEC_length (bb_cluster, all_clusters); ++i)
1519 c = VEC_index (bb_cluster, all_clusters, i);
1520 if (c == NULL)
1521 continue;
1523 bb2 = c->rep_bb;
1524 bitmap_set_bit (update_bbs, bb2->index);
1526 bitmap_clear_bit (c->bbs, bb2->index);
1527 EXECUTE_IF_SET_IN_BITMAP (c->bbs, 0, j, bj)
1529 bb1 = BASIC_BLOCK (j);
1530 bitmap_clear_bit (update_bbs, bb1->index);
1532 replace_block_by (bb1, bb2);
1533 nr_bbs_removed++;
1537 return nr_bbs_removed;
1540 /* Resets debug statement STMT if it has uses that are not dominated by their
1541 defs. */
1543 static void
1544 update_debug_stmt (gimple stmt)
1546 use_operand_p use_p;
1547 ssa_op_iter oi;
1548 basic_block bbdef, bbuse;
1549 gimple def_stmt;
1550 tree name;
1552 if (!gimple_debug_bind_p (stmt))
1553 return;
1555 bbuse = gimple_bb (stmt);
1556 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE)
1558 name = USE_FROM_PTR (use_p);
1559 gcc_assert (TREE_CODE (name) == SSA_NAME);
1561 def_stmt = SSA_NAME_DEF_STMT (name);
1562 gcc_assert (def_stmt != NULL);
1564 bbdef = gimple_bb (def_stmt);
1565 if (bbdef == NULL || bbuse == bbdef
1566 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef))
1567 continue;
1569 gimple_debug_bind_reset_value (stmt);
1570 update_stmt (stmt);
1574 /* Resets all debug statements that have uses that are not
1575 dominated by their defs. */
1577 static void
1578 update_debug_stmts (void)
1580 basic_block bb;
1581 bitmap_iterator bi;
1582 unsigned int i;
1584 EXECUTE_IF_SET_IN_BITMAP (update_bbs, 0, i, bi)
1586 gimple stmt;
1587 gimple_stmt_iterator gsi;
1589 bb = BASIC_BLOCK (i);
1590 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1592 stmt = gsi_stmt (gsi);
1593 if (!is_gimple_debug (stmt))
1594 continue;
1595 update_debug_stmt (stmt);
1600 /* Runs tail merge optimization. */
1602 unsigned int
1603 tail_merge_optimize (unsigned int todo)
1605 int nr_bbs_removed_total = 0;
1606 int nr_bbs_removed;
1607 bool loop_entered = false;
1608 int iteration_nr = 0;
1609 int max_iterations = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_ITERATIONS);
1611 if (!flag_tree_tail_merge || max_iterations == 0)
1612 return 0;
1614 timevar_push (TV_TREE_TAIL_MERGE);
1616 if (!dom_info_available_p (CDI_DOMINATORS))
1618 /* PRE can leave us with unreachable blocks, remove them now. */
1619 delete_unreachable_blocks ();
1620 calculate_dominance_info (CDI_DOMINATORS);
1622 init_worklist ();
1624 while (!VEC_empty (same_succ, worklist))
1626 if (!loop_entered)
1628 loop_entered = true;
1629 alloc_cluster_vectors ();
1630 update_bbs = BITMAP_ALLOC (NULL);
1632 else
1633 reset_cluster_vectors ();
1635 iteration_nr++;
1636 if (dump_file && (dump_flags & TDF_DETAILS))
1637 fprintf (dump_file, "worklist iteration #%d\n", iteration_nr);
1639 find_clusters ();
1640 gcc_assert (VEC_empty (same_succ, worklist));
1641 if (VEC_empty (bb_cluster, all_clusters))
1642 break;
1644 nr_bbs_removed = apply_clusters ();
1645 nr_bbs_removed_total += nr_bbs_removed;
1646 if (nr_bbs_removed == 0)
1647 break;
1649 free_dominance_info (CDI_DOMINATORS);
1651 if (iteration_nr == max_iterations)
1652 break;
1654 calculate_dominance_info (CDI_DOMINATORS);
1655 update_worklist ();
1658 if (dump_file && (dump_flags & TDF_DETAILS))
1659 fprintf (dump_file, "htab collision / search: %f\n",
1660 same_succ_htab.collisions ());
1662 if (nr_bbs_removed_total > 0)
1664 if (MAY_HAVE_DEBUG_STMTS)
1666 calculate_dominance_info (CDI_DOMINATORS);
1667 update_debug_stmts ();
1670 if (dump_file && (dump_flags & TDF_DETAILS))
1672 fprintf (dump_file, "Before TODOs.\n");
1673 dump_function_to_file (current_function_decl, dump_file, dump_flags);
1676 todo |= (TODO_verify_ssa | TODO_verify_stmts | TODO_verify_flow);
1677 mark_virtual_operands_for_renaming (cfun);
1680 delete_worklist ();
1681 if (loop_entered)
1683 delete_cluster_vectors ();
1684 BITMAP_FREE (update_bbs);
1687 timevar_pop (TV_TREE_TAIL_MERGE);
1689 return todo;