/cp
[official-gcc.git] / gcc / tree-ssa-tail-merge.c
blobf6b1ba0815472727ac7fea2f7e452a9d45308a75
1 /* Tail merging for gimple.
2 Copyright (C) 2011-2014 Free Software Foundation, Inc.
3 Contributed by Tom de Vries (tom@codesourcery.com)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Pass overview.
24 MOTIVATIONAL EXAMPLE
26 gimple representation of gcc/testsuite/gcc.dg/pr43864.c at
28 hprofStartupp (charD.1 * outputFileNameD.2600, charD.1 * ctxD.2601)
30 struct FILED.1638 * fpD.2605;
31 charD.1 fileNameD.2604[1000];
32 intD.0 D.3915;
33 const charD.1 * restrict outputFileName.0D.3914;
35 # BLOCK 2 freq:10000
36 # PRED: ENTRY [100.0%] (fallthru,exec)
37 # PT = nonlocal { D.3926 } (restr)
38 outputFileName.0D.3914_3
39 = (const charD.1 * restrict) outputFileNameD.2600_2(D);
40 # .MEMD.3923_13 = VDEF <.MEMD.3923_12(D)>
41 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
42 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
43 sprintfD.759 (&fileNameD.2604, outputFileName.0D.3914_3);
44 # .MEMD.3923_14 = VDEF <.MEMD.3923_13>
45 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
46 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
47 D.3915_4 = accessD.2606 (&fileNameD.2604, 1);
48 if (D.3915_4 == 0)
49 goto <bb 3>;
50 else
51 goto <bb 4>;
52 # SUCC: 3 [10.0%] (true,exec) 4 [90.0%] (false,exec)
54 # BLOCK 3 freq:1000
55 # PRED: 2 [10.0%] (true,exec)
56 # .MEMD.3923_15 = VDEF <.MEMD.3923_14>
57 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
58 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
59 freeD.898 (ctxD.2601_5(D));
60 goto <bb 7>;
61 # SUCC: 7 [100.0%] (fallthru,exec)
63 # BLOCK 4 freq:9000
64 # PRED: 2 [90.0%] (false,exec)
65 # .MEMD.3923_16 = VDEF <.MEMD.3923_14>
66 # PT = nonlocal escaped
67 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
68 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
69 fpD.2605_8 = fopenD.1805 (&fileNameD.2604[0], 0B);
70 if (fpD.2605_8 == 0B)
71 goto <bb 5>;
72 else
73 goto <bb 6>;
74 # SUCC: 5 [1.9%] (true,exec) 6 [98.1%] (false,exec)
76 # BLOCK 5 freq:173
77 # PRED: 4 [1.9%] (true,exec)
78 # .MEMD.3923_17 = VDEF <.MEMD.3923_16>
79 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
80 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
81 freeD.898 (ctxD.2601_5(D));
82 goto <bb 7>;
83 # SUCC: 7 [100.0%] (fallthru,exec)
85 # BLOCK 6 freq:8827
86 # PRED: 4 [98.1%] (false,exec)
87 # .MEMD.3923_18 = VDEF <.MEMD.3923_16>
88 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
89 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
90 fooD.2599 (outputFileNameD.2600_2(D), fpD.2605_8);
91 # SUCC: 7 [100.0%] (fallthru,exec)
93 # BLOCK 7 freq:10000
94 # PRED: 3 [100.0%] (fallthru,exec) 5 [100.0%] (fallthru,exec)
95 6 [100.0%] (fallthru,exec)
96 # PT = nonlocal null
98 # ctxD.2601_1 = PHI <0B(3), 0B(5), ctxD.2601_5(D)(6)>
99 # .MEMD.3923_11 = PHI <.MEMD.3923_15(3), .MEMD.3923_17(5),
100 .MEMD.3923_18(6)>
101 # VUSE <.MEMD.3923_11>
102 return ctxD.2601_1;
103 # SUCC: EXIT [100.0%]
106 bb 3 and bb 5 can be merged. The blocks have different predecessors, but the
107 same successors, and the same operations.
110 CONTEXT
112 A technique called tail merging (or cross jumping) can fix the example
113 above. For a block, we look for common code at the end (the tail) of the
114 predecessor blocks, and insert jumps from one block to the other.
115 The example is a special case for tail merging, in that 2 whole blocks
116 can be merged, rather than just the end parts of it.
117 We currently only focus on whole block merging, so in that sense
118 calling this pass tail merge is a bit of a misnomer.
120 We distinguish 2 kinds of situations in which blocks can be merged:
121 - same operations, same predecessors. The successor edges coming from one
122 block are redirected to come from the other block.
123 - same operations, same successors. The predecessor edges entering one block
124 are redirected to enter the other block. Note that this operation might
125 involve introducing phi operations.
127 For efficient implementation, we would like to value numbers the blocks, and
128 have a comparison operator that tells us whether the blocks are equal.
129 Besides being runtime efficient, block value numbering should also abstract
130 from irrelevant differences in order of operations, much like normal value
131 numbering abstracts from irrelevant order of operations.
133 For the first situation (same_operations, same predecessors), normal value
134 numbering fits well. We can calculate a block value number based on the
135 value numbers of the defs and vdefs.
137 For the second situation (same operations, same successors), this approach
138 doesn't work so well. We can illustrate this using the example. The calls
139 to free use different vdefs: MEMD.3923_16 and MEMD.3923_14, and these will
140 remain different in value numbering, since they represent different memory
141 states. So the resulting vdefs of the frees will be different in value
142 numbering, so the block value numbers will be different.
144 The reason why we call the blocks equal is not because they define the same
145 values, but because uses in the blocks use (possibly different) defs in the
146 same way. To be able to detect this efficiently, we need to do some kind of
147 reverse value numbering, meaning number the uses rather than the defs, and
148 calculate a block value number based on the value number of the uses.
149 Ideally, a block comparison operator will also indicate which phis are needed
150 to merge the blocks.
152 For the moment, we don't do block value numbering, but we do insn-by-insn
153 matching, using scc value numbers to match operations with results, and
154 structural comparison otherwise, while ignoring vop mismatches.
157 IMPLEMENTATION
159 1. The pass first determines all groups of blocks with the same successor
160 blocks.
161 2. Within each group, it tries to determine clusters of equal basic blocks.
162 3. The clusters are applied.
163 4. The same successor groups are updated.
164 5. This process is repeated from 2 onwards, until no more changes.
167 LIMITATIONS/TODO
169 - block only
170 - handles only 'same operations, same successors'.
171 It handles same predecessors as a special subcase though.
172 - does not implement the reverse value numbering and block value numbering.
173 - improve memory allocation: use garbage collected memory, obstacks,
174 allocpools where appropriate.
175 - no insertion of gimple_reg phis, We only introduce vop-phis.
176 - handle blocks with gimple_reg phi_nodes.
179 PASS PLACEMENT
180 This 'pass' is not a stand-alone gimple pass, but runs as part of
181 pass_pre, in order to share the value numbering.
184 SWITCHES
186 - ftree-tail-merge. On at -O2. We may have to enable it only at -Os. */
188 #include "config.h"
189 #include "system.h"
190 #include "coretypes.h"
191 #include "tm.h"
192 #include "tree.h"
193 #include "stor-layout.h"
194 #include "trans-mem.h"
195 #include "tm_p.h"
196 #include "basic-block.h"
197 #include "flags.h"
198 #include "function.h"
199 #include "hash-table.h"
200 #include "tree-ssa-alias.h"
201 #include "internal-fn.h"
202 #include "tree-eh.h"
203 #include "gimple-expr.h"
204 #include "is-a.h"
205 #include "gimple.h"
206 #include "gimple-iterator.h"
207 #include "gimple-ssa.h"
208 #include "tree-cfg.h"
209 #include "tree-phinodes.h"
210 #include "ssa-iterators.h"
211 #include "tree-into-ssa.h"
212 #include "params.h"
213 #include "gimple-pretty-print.h"
214 #include "tree-ssa-sccvn.h"
215 #include "tree-dump.h"
216 #include "cfgloop.h"
217 #include "tree-pass.h"
218 #include "trans-mem.h"
220 /* Describes a group of bbs with the same successors. The successor bbs are
221 cached in succs, and the successor edge flags are cached in succ_flags.
222 If a bb has the EDGE_TRUE/VALSE_VALUE flags swapped compared to succ_flags,
223 it's marked in inverse.
224 Additionally, the hash value for the struct is cached in hashval, and
225 in_worklist indicates whether it's currently part of worklist. */
227 struct same_succ_def
229 /* The bbs that have the same successor bbs. */
230 bitmap bbs;
231 /* The successor bbs. */
232 bitmap succs;
233 /* Indicates whether the EDGE_TRUE/FALSE_VALUEs of succ_flags are swapped for
234 bb. */
235 bitmap inverse;
236 /* The edge flags for each of the successor bbs. */
237 vec<int> succ_flags;
238 /* Indicates whether the struct is currently in the worklist. */
239 bool in_worklist;
240 /* The hash value of the struct. */
241 hashval_t hashval;
243 /* hash_table support. */
244 typedef same_succ_def value_type;
245 typedef same_succ_def compare_type;
246 static inline hashval_t hash (const value_type *);
247 static int equal (const value_type *, const compare_type *);
248 static void remove (value_type *);
250 typedef struct same_succ_def *same_succ;
251 typedef const struct same_succ_def *const_same_succ;
253 /* hash routine for hash_table support, returns hashval of E. */
255 inline hashval_t
256 same_succ_def::hash (const value_type *e)
258 return e->hashval;
261 /* A group of bbs where 1 bb from bbs can replace the other bbs. */
263 struct bb_cluster_def
265 /* The bbs in the cluster. */
266 bitmap bbs;
267 /* The preds of the bbs in the cluster. */
268 bitmap preds;
269 /* Index in all_clusters vector. */
270 int index;
271 /* The bb to replace the cluster with. */
272 basic_block rep_bb;
274 typedef struct bb_cluster_def *bb_cluster;
275 typedef const struct bb_cluster_def *const_bb_cluster;
277 /* Per bb-info. */
279 struct aux_bb_info
281 /* The number of non-debug statements in the bb. */
282 int size;
283 /* The same_succ that this bb is a member of. */
284 same_succ bb_same_succ;
285 /* The cluster that this bb is a member of. */
286 bb_cluster cluster;
287 /* The vop state at the exit of a bb. This is shortlived data, used to
288 communicate data between update_block_by and update_vuses. */
289 tree vop_at_exit;
290 /* The bb that either contains or is dominated by the dependencies of the
291 bb. */
292 basic_block dep_bb;
295 /* Macros to access the fields of struct aux_bb_info. */
297 #define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size)
298 #define BB_SAME_SUCC(bb) (((struct aux_bb_info *)bb->aux)->bb_same_succ)
299 #define BB_CLUSTER(bb) (((struct aux_bb_info *)bb->aux)->cluster)
300 #define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit)
301 #define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb)
303 /* Returns true if the only effect a statement STMT has, is to define locally
304 used SSA_NAMEs. */
306 static bool
307 stmt_local_def (gimple stmt)
309 basic_block bb, def_bb;
310 imm_use_iterator iter;
311 use_operand_p use_p;
312 tree val;
313 def_operand_p def_p;
315 if (gimple_has_side_effects (stmt)
316 || stmt_could_throw_p (stmt)
317 || gimple_vdef (stmt) != NULL_TREE)
318 return false;
320 def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF);
321 if (def_p == NULL)
322 return false;
324 val = DEF_FROM_PTR (def_p);
325 if (val == NULL_TREE || TREE_CODE (val) != SSA_NAME)
326 return false;
328 def_bb = gimple_bb (stmt);
330 FOR_EACH_IMM_USE_FAST (use_p, iter, val)
332 if (is_gimple_debug (USE_STMT (use_p)))
333 continue;
334 bb = gimple_bb (USE_STMT (use_p));
335 if (bb == def_bb)
336 continue;
338 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
339 && EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src == def_bb)
340 continue;
342 return false;
345 return true;
348 /* Let GSI skip forwards over local defs. */
350 static void
351 gsi_advance_fw_nondebug_nonlocal (gimple_stmt_iterator *gsi)
353 gimple stmt;
355 while (true)
357 if (gsi_end_p (*gsi))
358 return;
359 stmt = gsi_stmt (*gsi);
360 if (!stmt_local_def (stmt))
361 return;
362 gsi_next_nondebug (gsi);
366 /* VAL1 and VAL2 are either:
367 - uses in BB1 and BB2, or
368 - phi alternatives for BB1 and BB2.
369 Return true if the uses have the same gvn value. */
371 static bool
372 gvn_uses_equal (tree val1, tree val2)
374 gcc_checking_assert (val1 != NULL_TREE && val2 != NULL_TREE);
376 if (val1 == val2)
377 return true;
379 if (vn_valueize (val1) != vn_valueize (val2))
380 return false;
382 return ((TREE_CODE (val1) == SSA_NAME || CONSTANT_CLASS_P (val1))
383 && (TREE_CODE (val2) == SSA_NAME || CONSTANT_CLASS_P (val2)));
386 /* Prints E to FILE. */
388 static void
389 same_succ_print (FILE *file, const same_succ e)
391 unsigned int i;
392 bitmap_print (file, e->bbs, "bbs:", "\n");
393 bitmap_print (file, e->succs, "succs:", "\n");
394 bitmap_print (file, e->inverse, "inverse:", "\n");
395 fprintf (file, "flags:");
396 for (i = 0; i < e->succ_flags.length (); ++i)
397 fprintf (file, " %x", e->succ_flags[i]);
398 fprintf (file, "\n");
401 /* Prints same_succ VE to VFILE. */
403 inline int
404 ssa_same_succ_print_traverse (same_succ *pe, FILE *file)
406 const same_succ e = *pe;
407 same_succ_print (file, e);
408 return 1;
411 /* Update BB_DEP_BB (USE_BB), given a use of VAL in USE_BB. */
413 static void
414 update_dep_bb (basic_block use_bb, tree val)
416 basic_block dep_bb;
418 /* Not a dep. */
419 if (TREE_CODE (val) != SSA_NAME)
420 return;
422 /* Skip use of global def. */
423 if (SSA_NAME_IS_DEFAULT_DEF (val))
424 return;
426 /* Skip use of local def. */
427 dep_bb = gimple_bb (SSA_NAME_DEF_STMT (val));
428 if (dep_bb == use_bb)
429 return;
431 if (BB_DEP_BB (use_bb) == NULL
432 || dominated_by_p (CDI_DOMINATORS, dep_bb, BB_DEP_BB (use_bb)))
433 BB_DEP_BB (use_bb) = dep_bb;
436 /* Update BB_DEP_BB, given the dependencies in STMT. */
438 static void
439 stmt_update_dep_bb (gimple stmt)
441 ssa_op_iter iter;
442 use_operand_p use;
444 FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE)
445 update_dep_bb (gimple_bb (stmt), USE_FROM_PTR (use));
448 /* Calculates hash value for same_succ VE. */
450 static hashval_t
451 same_succ_hash (const_same_succ e)
453 hashval_t hashval = bitmap_hash (e->succs);
454 int flags;
455 unsigned int i;
456 unsigned int first = bitmap_first_set_bit (e->bbs);
457 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, first);
458 int size = 0;
459 gimple_stmt_iterator gsi;
460 gimple stmt;
461 tree arg;
462 unsigned int s;
463 bitmap_iterator bs;
465 for (gsi = gsi_start_nondebug_bb (bb);
466 !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
468 stmt = gsi_stmt (gsi);
469 stmt_update_dep_bb (stmt);
470 if (stmt_local_def (stmt))
471 continue;
472 size++;
474 hashval = iterative_hash_hashval_t (gimple_code (stmt), hashval);
475 if (is_gimple_assign (stmt))
476 hashval = iterative_hash_hashval_t (gimple_assign_rhs_code (stmt),
477 hashval);
478 if (!is_gimple_call (stmt))
479 continue;
480 if (gimple_call_internal_p (stmt))
481 hashval = iterative_hash_hashval_t
482 ((hashval_t) gimple_call_internal_fn (stmt), hashval);
483 else
484 hashval = iterative_hash_expr (gimple_call_fn (stmt), hashval);
485 for (i = 0; i < gimple_call_num_args (stmt); i++)
487 arg = gimple_call_arg (stmt, i);
488 arg = vn_valueize (arg);
489 hashval = iterative_hash_expr (arg, hashval);
493 hashval = iterative_hash_hashval_t (size, hashval);
494 BB_SIZE (bb) = size;
496 for (i = 0; i < e->succ_flags.length (); ++i)
498 flags = e->succ_flags[i];
499 flags = flags & ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
500 hashval = iterative_hash_hashval_t (flags, hashval);
503 EXECUTE_IF_SET_IN_BITMAP (e->succs, 0, s, bs)
505 int n = find_edge (bb, BASIC_BLOCK_FOR_FN (cfun, s))->dest_idx;
506 for (gsi = gsi_start_phis (BASIC_BLOCK_FOR_FN (cfun, s)); !gsi_end_p (gsi);
507 gsi_next (&gsi))
509 gimple phi = gsi_stmt (gsi);
510 tree lhs = gimple_phi_result (phi);
511 tree val = gimple_phi_arg_def (phi, n);
513 if (virtual_operand_p (lhs))
514 continue;
515 update_dep_bb (bb, val);
519 return hashval;
522 /* Returns true if E1 and E2 have 2 successors, and if the successor flags
523 are inverse for the EDGE_TRUE_VALUE and EDGE_FALSE_VALUE flags, and equal for
524 the other edge flags. */
526 static bool
527 inverse_flags (const_same_succ e1, const_same_succ e2)
529 int f1a, f1b, f2a, f2b;
530 int mask = ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
532 if (e1->succ_flags.length () != 2)
533 return false;
535 f1a = e1->succ_flags[0];
536 f1b = e1->succ_flags[1];
537 f2a = e2->succ_flags[0];
538 f2b = e2->succ_flags[1];
540 if (f1a == f2a && f1b == f2b)
541 return false;
543 return (f1a & mask) == (f2a & mask) && (f1b & mask) == (f2b & mask);
546 /* Compares SAME_SUCCs E1 and E2. */
549 same_succ_def::equal (const value_type *e1, const compare_type *e2)
551 unsigned int i, first1, first2;
552 gimple_stmt_iterator gsi1, gsi2;
553 gimple s1, s2;
554 basic_block bb1, bb2;
556 if (e1->hashval != e2->hashval)
557 return 0;
559 if (e1->succ_flags.length () != e2->succ_flags.length ())
560 return 0;
562 if (!bitmap_equal_p (e1->succs, e2->succs))
563 return 0;
565 if (!inverse_flags (e1, e2))
567 for (i = 0; i < e1->succ_flags.length (); ++i)
568 if (e1->succ_flags[i] != e1->succ_flags[i])
569 return 0;
572 first1 = bitmap_first_set_bit (e1->bbs);
573 first2 = bitmap_first_set_bit (e2->bbs);
575 bb1 = BASIC_BLOCK_FOR_FN (cfun, first1);
576 bb2 = BASIC_BLOCK_FOR_FN (cfun, first2);
578 if (BB_SIZE (bb1) != BB_SIZE (bb2))
579 return 0;
581 gsi1 = gsi_start_nondebug_bb (bb1);
582 gsi2 = gsi_start_nondebug_bb (bb2);
583 gsi_advance_fw_nondebug_nonlocal (&gsi1);
584 gsi_advance_fw_nondebug_nonlocal (&gsi2);
585 while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2)))
587 s1 = gsi_stmt (gsi1);
588 s2 = gsi_stmt (gsi2);
589 if (gimple_code (s1) != gimple_code (s2))
590 return 0;
591 if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2))
592 return 0;
593 gsi_next_nondebug (&gsi1);
594 gsi_next_nondebug (&gsi2);
595 gsi_advance_fw_nondebug_nonlocal (&gsi1);
596 gsi_advance_fw_nondebug_nonlocal (&gsi2);
599 return 1;
602 /* Alloc and init a new SAME_SUCC. */
604 static same_succ
605 same_succ_alloc (void)
607 same_succ same = XNEW (struct same_succ_def);
609 same->bbs = BITMAP_ALLOC (NULL);
610 same->succs = BITMAP_ALLOC (NULL);
611 same->inverse = BITMAP_ALLOC (NULL);
612 same->succ_flags.create (10);
613 same->in_worklist = false;
615 return same;
618 /* Delete same_succ E. */
620 void
621 same_succ_def::remove (same_succ e)
623 BITMAP_FREE (e->bbs);
624 BITMAP_FREE (e->succs);
625 BITMAP_FREE (e->inverse);
626 e->succ_flags.release ();
628 XDELETE (e);
631 /* Reset same_succ SAME. */
633 static void
634 same_succ_reset (same_succ same)
636 bitmap_clear (same->bbs);
637 bitmap_clear (same->succs);
638 bitmap_clear (same->inverse);
639 same->succ_flags.truncate (0);
642 static hash_table <same_succ_def> same_succ_htab;
644 /* Array that is used to store the edge flags for a successor. */
646 static int *same_succ_edge_flags;
648 /* Bitmap that is used to mark bbs that are recently deleted. */
650 static bitmap deleted_bbs;
652 /* Bitmap that is used to mark predecessors of bbs that are
653 deleted. */
655 static bitmap deleted_bb_preds;
657 /* Prints same_succ_htab to stderr. */
659 extern void debug_same_succ (void);
660 DEBUG_FUNCTION void
661 debug_same_succ ( void)
663 same_succ_htab.traverse <FILE *, ssa_same_succ_print_traverse> (stderr);
667 /* Vector of bbs to process. */
669 static vec<same_succ> worklist;
671 /* Prints worklist to FILE. */
673 static void
674 print_worklist (FILE *file)
676 unsigned int i;
677 for (i = 0; i < worklist.length (); ++i)
678 same_succ_print (file, worklist[i]);
681 /* Adds SAME to worklist. */
683 static void
684 add_to_worklist (same_succ same)
686 if (same->in_worklist)
687 return;
689 if (bitmap_count_bits (same->bbs) < 2)
690 return;
692 same->in_worklist = true;
693 worklist.safe_push (same);
696 /* Add BB to same_succ_htab. */
698 static void
699 find_same_succ_bb (basic_block bb, same_succ *same_p)
701 unsigned int j;
702 bitmap_iterator bj;
703 same_succ same = *same_p;
704 same_succ *slot;
705 edge_iterator ei;
706 edge e;
708 if (bb == NULL
709 /* Be conservative with loop structure. It's not evident that this test
710 is sufficient. Before tail-merge, we've just called
711 loop_optimizer_finalize, and LOOPS_MAY_HAVE_MULTIPLE_LATCHES is now
712 set, so there's no guarantee that the loop->latch value is still valid.
713 But we assume that, since we've forced LOOPS_HAVE_SIMPLE_LATCHES at the
714 start of pre, we've kept that property intact throughout pre, and are
715 keeping it throughout tail-merge using this test. */
716 || bb->loop_father->latch == bb)
717 return;
718 bitmap_set_bit (same->bbs, bb->index);
719 FOR_EACH_EDGE (e, ei, bb->succs)
721 int index = e->dest->index;
722 bitmap_set_bit (same->succs, index);
723 same_succ_edge_flags[index] = e->flags;
725 EXECUTE_IF_SET_IN_BITMAP (same->succs, 0, j, bj)
726 same->succ_flags.safe_push (same_succ_edge_flags[j]);
728 same->hashval = same_succ_hash (same);
730 slot = same_succ_htab.find_slot_with_hash (same, same->hashval, INSERT);
731 if (*slot == NULL)
733 *slot = same;
734 BB_SAME_SUCC (bb) = same;
735 add_to_worklist (same);
736 *same_p = NULL;
738 else
740 bitmap_set_bit ((*slot)->bbs, bb->index);
741 BB_SAME_SUCC (bb) = *slot;
742 add_to_worklist (*slot);
743 if (inverse_flags (same, *slot))
744 bitmap_set_bit ((*slot)->inverse, bb->index);
745 same_succ_reset (same);
749 /* Find bbs with same successors. */
751 static void
752 find_same_succ (void)
754 same_succ same = same_succ_alloc ();
755 basic_block bb;
757 FOR_EACH_BB_FN (bb, cfun)
759 find_same_succ_bb (bb, &same);
760 if (same == NULL)
761 same = same_succ_alloc ();
764 same_succ_def::remove (same);
767 /* Initializes worklist administration. */
769 static void
770 init_worklist (void)
772 alloc_aux_for_blocks (sizeof (struct aux_bb_info));
773 same_succ_htab.create (n_basic_blocks_for_fn (cfun));
774 same_succ_edge_flags = XCNEWVEC (int, last_basic_block_for_fn (cfun));
775 deleted_bbs = BITMAP_ALLOC (NULL);
776 deleted_bb_preds = BITMAP_ALLOC (NULL);
777 worklist.create (n_basic_blocks_for_fn (cfun));
778 find_same_succ ();
780 if (dump_file && (dump_flags & TDF_DETAILS))
782 fprintf (dump_file, "initial worklist:\n");
783 print_worklist (dump_file);
787 /* Deletes worklist administration. */
789 static void
790 delete_worklist (void)
792 free_aux_for_blocks ();
793 same_succ_htab.dispose ();
794 XDELETEVEC (same_succ_edge_flags);
795 same_succ_edge_flags = NULL;
796 BITMAP_FREE (deleted_bbs);
797 BITMAP_FREE (deleted_bb_preds);
798 worklist.release ();
801 /* Mark BB as deleted, and mark its predecessors. */
803 static void
804 mark_basic_block_deleted (basic_block bb)
806 edge e;
807 edge_iterator ei;
809 bitmap_set_bit (deleted_bbs, bb->index);
811 FOR_EACH_EDGE (e, ei, bb->preds)
812 bitmap_set_bit (deleted_bb_preds, e->src->index);
815 /* Removes BB from its corresponding same_succ. */
817 static void
818 same_succ_flush_bb (basic_block bb)
820 same_succ same = BB_SAME_SUCC (bb);
821 BB_SAME_SUCC (bb) = NULL;
822 if (bitmap_single_bit_set_p (same->bbs))
823 same_succ_htab.remove_elt_with_hash (same, same->hashval);
824 else
825 bitmap_clear_bit (same->bbs, bb->index);
828 /* Removes all bbs in BBS from their corresponding same_succ. */
830 static void
831 same_succ_flush_bbs (bitmap bbs)
833 unsigned int i;
834 bitmap_iterator bi;
836 EXECUTE_IF_SET_IN_BITMAP (bbs, 0, i, bi)
837 same_succ_flush_bb (BASIC_BLOCK_FOR_FN (cfun, i));
840 /* Release the last vdef in BB, either normal or phi result. */
842 static void
843 release_last_vdef (basic_block bb)
845 gimple_stmt_iterator i;
847 for (i = gsi_last_bb (bb); !gsi_end_p (i); gsi_prev_nondebug (&i))
849 gimple stmt = gsi_stmt (i);
850 if (gimple_vdef (stmt) == NULL_TREE)
851 continue;
853 mark_virtual_operand_for_renaming (gimple_vdef (stmt));
854 return;
857 for (i = gsi_start_phis (bb); !gsi_end_p (i); gsi_next (&i))
859 gimple phi = gsi_stmt (i);
860 tree res = gimple_phi_result (phi);
862 if (!virtual_operand_p (res))
863 continue;
865 mark_virtual_phi_result_for_renaming (phi);
866 return;
871 /* For deleted_bb_preds, find bbs with same successors. */
873 static void
874 update_worklist (void)
876 unsigned int i;
877 bitmap_iterator bi;
878 basic_block bb;
879 same_succ same;
881 bitmap_and_compl_into (deleted_bb_preds, deleted_bbs);
882 bitmap_clear (deleted_bbs);
884 bitmap_clear_bit (deleted_bb_preds, ENTRY_BLOCK);
885 same_succ_flush_bbs (deleted_bb_preds);
887 same = same_succ_alloc ();
888 EXECUTE_IF_SET_IN_BITMAP (deleted_bb_preds, 0, i, bi)
890 bb = BASIC_BLOCK_FOR_FN (cfun, i);
891 gcc_assert (bb != NULL);
892 find_same_succ_bb (bb, &same);
893 if (same == NULL)
894 same = same_succ_alloc ();
896 same_succ_def::remove (same);
897 bitmap_clear (deleted_bb_preds);
900 /* Prints cluster C to FILE. */
902 static void
903 print_cluster (FILE *file, bb_cluster c)
905 if (c == NULL)
906 return;
907 bitmap_print (file, c->bbs, "bbs:", "\n");
908 bitmap_print (file, c->preds, "preds:", "\n");
911 /* Prints cluster C to stderr. */
913 extern void debug_cluster (bb_cluster);
914 DEBUG_FUNCTION void
915 debug_cluster (bb_cluster c)
917 print_cluster (stderr, c);
920 /* Update C->rep_bb, given that BB is added to the cluster. */
922 static void
923 update_rep_bb (bb_cluster c, basic_block bb)
925 /* Initial. */
926 if (c->rep_bb == NULL)
928 c->rep_bb = bb;
929 return;
932 /* Current needs no deps, keep it. */
933 if (BB_DEP_BB (c->rep_bb) == NULL)
934 return;
936 /* Bb needs no deps, change rep_bb. */
937 if (BB_DEP_BB (bb) == NULL)
939 c->rep_bb = bb;
940 return;
943 /* Bb needs last deps earlier than current, change rep_bb. A potential
944 problem with this, is that the first deps might also be earlier, which
945 would mean we prefer longer lifetimes for the deps. To be able to check
946 for this, we would have to trace BB_FIRST_DEP_BB as well, besides
947 BB_DEP_BB, which is really BB_LAST_DEP_BB.
948 The benefit of choosing the bb with last deps earlier, is that it can
949 potentially be used as replacement for more bbs. */
950 if (dominated_by_p (CDI_DOMINATORS, BB_DEP_BB (c->rep_bb), BB_DEP_BB (bb)))
951 c->rep_bb = bb;
954 /* Add BB to cluster C. Sets BB in C->bbs, and preds of BB in C->preds. */
956 static void
957 add_bb_to_cluster (bb_cluster c, basic_block bb)
959 edge e;
960 edge_iterator ei;
962 bitmap_set_bit (c->bbs, bb->index);
964 FOR_EACH_EDGE (e, ei, bb->preds)
965 bitmap_set_bit (c->preds, e->src->index);
967 update_rep_bb (c, bb);
970 /* Allocate and init new cluster. */
972 static bb_cluster
973 new_cluster (void)
975 bb_cluster c;
976 c = XCNEW (struct bb_cluster_def);
977 c->bbs = BITMAP_ALLOC (NULL);
978 c->preds = BITMAP_ALLOC (NULL);
979 c->rep_bb = NULL;
980 return c;
983 /* Delete clusters. */
985 static void
986 delete_cluster (bb_cluster c)
988 if (c == NULL)
989 return;
990 BITMAP_FREE (c->bbs);
991 BITMAP_FREE (c->preds);
992 XDELETE (c);
996 /* Array that contains all clusters. */
998 static vec<bb_cluster> all_clusters;
1000 /* Allocate all cluster vectors. */
1002 static void
1003 alloc_cluster_vectors (void)
1005 all_clusters.create (n_basic_blocks_for_fn (cfun));
1008 /* Reset all cluster vectors. */
1010 static void
1011 reset_cluster_vectors (void)
1013 unsigned int i;
1014 basic_block bb;
1015 for (i = 0; i < all_clusters.length (); ++i)
1016 delete_cluster (all_clusters[i]);
1017 all_clusters.truncate (0);
1018 FOR_EACH_BB_FN (bb, cfun)
1019 BB_CLUSTER (bb) = NULL;
1022 /* Delete all cluster vectors. */
1024 static void
1025 delete_cluster_vectors (void)
1027 unsigned int i;
1028 for (i = 0; i < all_clusters.length (); ++i)
1029 delete_cluster (all_clusters[i]);
1030 all_clusters.release ();
1033 /* Merge cluster C2 into C1. */
1035 static void
1036 merge_clusters (bb_cluster c1, bb_cluster c2)
1038 bitmap_ior_into (c1->bbs, c2->bbs);
1039 bitmap_ior_into (c1->preds, c2->preds);
1042 /* Register equivalence of BB1 and BB2 (members of cluster C). Store c in
1043 all_clusters, or merge c with existing cluster. */
1045 static void
1046 set_cluster (basic_block bb1, basic_block bb2)
1048 basic_block merge_bb, other_bb;
1049 bb_cluster merge, old, c;
1051 if (BB_CLUSTER (bb1) == NULL && BB_CLUSTER (bb2) == NULL)
1053 c = new_cluster ();
1054 add_bb_to_cluster (c, bb1);
1055 add_bb_to_cluster (c, bb2);
1056 BB_CLUSTER (bb1) = c;
1057 BB_CLUSTER (bb2) = c;
1058 c->index = all_clusters.length ();
1059 all_clusters.safe_push (c);
1061 else if (BB_CLUSTER (bb1) == NULL || BB_CLUSTER (bb2) == NULL)
1063 merge_bb = BB_CLUSTER (bb1) == NULL ? bb2 : bb1;
1064 other_bb = BB_CLUSTER (bb1) == NULL ? bb1 : bb2;
1065 merge = BB_CLUSTER (merge_bb);
1066 add_bb_to_cluster (merge, other_bb);
1067 BB_CLUSTER (other_bb) = merge;
1069 else if (BB_CLUSTER (bb1) != BB_CLUSTER (bb2))
1071 unsigned int i;
1072 bitmap_iterator bi;
1074 old = BB_CLUSTER (bb2);
1075 merge = BB_CLUSTER (bb1);
1076 merge_clusters (merge, old);
1077 EXECUTE_IF_SET_IN_BITMAP (old->bbs, 0, i, bi)
1078 BB_CLUSTER (BASIC_BLOCK_FOR_FN (cfun, i)) = merge;
1079 all_clusters[old->index] = NULL;
1080 update_rep_bb (merge, old->rep_bb);
1081 delete_cluster (old);
1083 else
1084 gcc_unreachable ();
1087 /* Return true if gimple operands T1 and T2 have the same value. */
1089 static bool
1090 gimple_operand_equal_value_p (tree t1, tree t2)
1092 if (t1 == t2)
1093 return true;
1095 if (t1 == NULL_TREE
1096 || t2 == NULL_TREE)
1097 return false;
1099 if (operand_equal_p (t1, t2, 0))
1100 return true;
1102 return gvn_uses_equal (t1, t2);
1105 /* Return true if gimple statements S1 and S2 are equal. Gimple_bb (s1) and
1106 gimple_bb (s2) are members of SAME_SUCC. */
1108 static bool
1109 gimple_equal_p (same_succ same_succ, gimple s1, gimple s2)
1111 unsigned int i;
1112 tree lhs1, lhs2;
1113 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1114 tree t1, t2;
1115 bool inv_cond;
1116 enum tree_code code1, code2;
1118 if (gimple_code (s1) != gimple_code (s2))
1119 return false;
1121 switch (gimple_code (s1))
1123 case GIMPLE_CALL:
1124 if (gimple_call_num_args (s1) != gimple_call_num_args (s2))
1125 return false;
1126 if (!gimple_call_same_target_p (s1, s2))
1127 return false;
1129 for (i = 0; i < gimple_call_num_args (s1); ++i)
1131 t1 = gimple_call_arg (s1, i);
1132 t2 = gimple_call_arg (s2, i);
1133 if (gimple_operand_equal_value_p (t1, t2))
1134 continue;
1135 return false;
1138 lhs1 = gimple_get_lhs (s1);
1139 lhs2 = gimple_get_lhs (s2);
1140 if (lhs1 == NULL_TREE && lhs2 == NULL_TREE)
1141 return true;
1142 if (lhs1 == NULL_TREE || lhs2 == NULL_TREE)
1143 return false;
1144 if (TREE_CODE (lhs1) == SSA_NAME && TREE_CODE (lhs2) == SSA_NAME)
1145 return vn_valueize (lhs1) == vn_valueize (lhs2);
1146 return operand_equal_p (lhs1, lhs2, 0);
1148 case GIMPLE_ASSIGN:
1149 lhs1 = gimple_get_lhs (s1);
1150 lhs2 = gimple_get_lhs (s2);
1151 if (TREE_CODE (lhs1) != SSA_NAME
1152 && TREE_CODE (lhs2) != SSA_NAME)
1154 /* If the vdef is the same, it's the same statement. */
1155 if (vn_valueize (gimple_vdef (s1))
1156 == vn_valueize (gimple_vdef (s2)))
1157 return true;
1159 /* Test for structural equality. */
1160 return (operand_equal_p (lhs1, lhs2, 0)
1161 && gimple_operand_equal_value_p (gimple_assign_rhs1 (s1),
1162 gimple_assign_rhs1 (s2)));
1164 else if (TREE_CODE (lhs1) == SSA_NAME
1165 && TREE_CODE (lhs2) == SSA_NAME)
1166 return vn_valueize (lhs1) == vn_valueize (lhs2);
1167 return false;
1169 case GIMPLE_COND:
1170 t1 = gimple_cond_lhs (s1);
1171 t2 = gimple_cond_lhs (s2);
1172 if (!gimple_operand_equal_value_p (t1, t2))
1173 return false;
1175 t1 = gimple_cond_rhs (s1);
1176 t2 = gimple_cond_rhs (s2);
1177 if (!gimple_operand_equal_value_p (t1, t2))
1178 return false;
1180 code1 = gimple_expr_code (s1);
1181 code2 = gimple_expr_code (s2);
1182 inv_cond = (bitmap_bit_p (same_succ->inverse, bb1->index)
1183 != bitmap_bit_p (same_succ->inverse, bb2->index));
1184 if (inv_cond)
1186 bool honor_nans
1187 = HONOR_NANS (TYPE_MODE (TREE_TYPE (gimple_cond_lhs (s1))));
1188 code2 = invert_tree_comparison (code2, honor_nans);
1190 return code1 == code2;
1192 default:
1193 return false;
1197 /* Let GSI skip backwards over local defs. Return the earliest vuse in VUSE.
1198 Return true in VUSE_ESCAPED if the vuse influenced a SSA_OP_DEF of one of the
1199 processed statements. */
1201 static void
1202 gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator *gsi, tree *vuse,
1203 bool *vuse_escaped)
1205 gimple stmt;
1206 tree lvuse;
1208 while (true)
1210 if (gsi_end_p (*gsi))
1211 return;
1212 stmt = gsi_stmt (*gsi);
1214 lvuse = gimple_vuse (stmt);
1215 if (lvuse != NULL_TREE)
1217 *vuse = lvuse;
1218 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_DEF))
1219 *vuse_escaped = true;
1222 if (!stmt_local_def (stmt))
1223 return;
1224 gsi_prev_nondebug (gsi);
1228 /* Determines whether BB1 and BB2 (members of same_succ) are duplicates. If so,
1229 clusters them. */
1231 static void
1232 find_duplicate (same_succ same_succ, basic_block bb1, basic_block bb2)
1234 gimple_stmt_iterator gsi1 = gsi_last_nondebug_bb (bb1);
1235 gimple_stmt_iterator gsi2 = gsi_last_nondebug_bb (bb2);
1236 tree vuse1 = NULL_TREE, vuse2 = NULL_TREE;
1237 bool vuse_escaped = false;
1239 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1240 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1242 while (!gsi_end_p (gsi1) && !gsi_end_p (gsi2))
1244 gimple stmt1 = gsi_stmt (gsi1);
1245 gimple stmt2 = gsi_stmt (gsi2);
1247 /* What could be better than to this this here is to blacklist the bb
1248 containing the stmt, when encountering the stmt f.i. in
1249 same_succ_hash. */
1250 if (is_tm_ending (stmt1)
1251 || is_tm_ending (stmt2))
1252 return;
1254 if (!gimple_equal_p (same_succ, stmt1, stmt2))
1255 return;
1257 gsi_prev_nondebug (&gsi1);
1258 gsi_prev_nondebug (&gsi2);
1259 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1260 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1263 if (!(gsi_end_p (gsi1) && gsi_end_p (gsi2)))
1264 return;
1266 /* If the incoming vuses are not the same, and the vuse escaped into an
1267 SSA_OP_DEF, then merging the 2 blocks will change the value of the def,
1268 which potentially means the semantics of one of the blocks will be changed.
1269 TODO: make this check more precise. */
1270 if (vuse_escaped && vuse1 != vuse2)
1271 return;
1273 if (dump_file)
1274 fprintf (dump_file, "find_duplicates: <bb %d> duplicate of <bb %d>\n",
1275 bb1->index, bb2->index);
1277 set_cluster (bb1, bb2);
1280 /* Returns whether for all phis in DEST the phi alternatives for E1 and
1281 E2 are equal. */
1283 static bool
1284 same_phi_alternatives_1 (basic_block dest, edge e1, edge e2)
1286 int n1 = e1->dest_idx, n2 = e2->dest_idx;
1287 gimple_stmt_iterator gsi;
1289 for (gsi = gsi_start_phis (dest); !gsi_end_p (gsi); gsi_next (&gsi))
1291 gimple phi = gsi_stmt (gsi);
1292 tree lhs = gimple_phi_result (phi);
1293 tree val1 = gimple_phi_arg_def (phi, n1);
1294 tree val2 = gimple_phi_arg_def (phi, n2);
1296 if (virtual_operand_p (lhs))
1297 continue;
1299 if (operand_equal_for_phi_arg_p (val1, val2))
1300 continue;
1301 if (gvn_uses_equal (val1, val2))
1302 continue;
1304 return false;
1307 return true;
1310 /* Returns whether for all successors of BB1 and BB2 (members of SAME_SUCC), the
1311 phi alternatives for BB1 and BB2 are equal. */
1313 static bool
1314 same_phi_alternatives (same_succ same_succ, basic_block bb1, basic_block bb2)
1316 unsigned int s;
1317 bitmap_iterator bs;
1318 edge e1, e2;
1319 basic_block succ;
1321 EXECUTE_IF_SET_IN_BITMAP (same_succ->succs, 0, s, bs)
1323 succ = BASIC_BLOCK_FOR_FN (cfun, s);
1324 e1 = find_edge (bb1, succ);
1325 e2 = find_edge (bb2, succ);
1326 if (e1->flags & EDGE_COMPLEX
1327 || e2->flags & EDGE_COMPLEX)
1328 return false;
1330 /* For all phis in bb, the phi alternatives for e1 and e2 need to have
1331 the same value. */
1332 if (!same_phi_alternatives_1 (succ, e1, e2))
1333 return false;
1336 return true;
1339 /* Return true if BB has non-vop phis. */
1341 static bool
1342 bb_has_non_vop_phi (basic_block bb)
1344 gimple_seq phis = phi_nodes (bb);
1345 gimple phi;
1347 if (phis == NULL)
1348 return false;
1350 if (!gimple_seq_singleton_p (phis))
1351 return true;
1353 phi = gimple_seq_first_stmt (phis);
1354 return !virtual_operand_p (gimple_phi_result (phi));
1357 /* Returns true if redirecting the incoming edges of FROM to TO maintains the
1358 invariant that uses in FROM are dominates by their defs. */
1360 static bool
1361 deps_ok_for_redirect_from_bb_to_bb (basic_block from, basic_block to)
1363 basic_block cd, dep_bb = BB_DEP_BB (to);
1364 edge_iterator ei;
1365 edge e;
1366 bitmap from_preds = BITMAP_ALLOC (NULL);
1368 if (dep_bb == NULL)
1369 return true;
1371 FOR_EACH_EDGE (e, ei, from->preds)
1372 bitmap_set_bit (from_preds, e->src->index);
1373 cd = nearest_common_dominator_for_set (CDI_DOMINATORS, from_preds);
1374 BITMAP_FREE (from_preds);
1376 return dominated_by_p (CDI_DOMINATORS, dep_bb, cd);
1379 /* Returns true if replacing BB1 (or its replacement bb) by BB2 (or its
1380 replacement bb) and vice versa maintains the invariant that uses in the
1381 replacement are dominates by their defs. */
1383 static bool
1384 deps_ok_for_redirect (basic_block bb1, basic_block bb2)
1386 if (BB_CLUSTER (bb1) != NULL)
1387 bb1 = BB_CLUSTER (bb1)->rep_bb;
1389 if (BB_CLUSTER (bb2) != NULL)
1390 bb2 = BB_CLUSTER (bb2)->rep_bb;
1392 return (deps_ok_for_redirect_from_bb_to_bb (bb1, bb2)
1393 && deps_ok_for_redirect_from_bb_to_bb (bb2, bb1));
1396 /* Within SAME_SUCC->bbs, find clusters of bbs which can be merged. */
1398 static void
1399 find_clusters_1 (same_succ same_succ)
1401 basic_block bb1, bb2;
1402 unsigned int i, j;
1403 bitmap_iterator bi, bj;
1404 int nr_comparisons;
1405 int max_comparisons = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_COMPARISONS);
1407 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, 0, i, bi)
1409 bb1 = BASIC_BLOCK_FOR_FN (cfun, i);
1411 /* TODO: handle blocks with phi-nodes. We'll have to find corresponding
1412 phi-nodes in bb1 and bb2, with the same alternatives for the same
1413 preds. */
1414 if (bb_has_non_vop_phi (bb1))
1415 continue;
1417 nr_comparisons = 0;
1418 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, i + 1, j, bj)
1420 bb2 = BASIC_BLOCK_FOR_FN (cfun, j);
1422 if (bb_has_non_vop_phi (bb2))
1423 continue;
1425 if (BB_CLUSTER (bb1) != NULL && BB_CLUSTER (bb1) == BB_CLUSTER (bb2))
1426 continue;
1428 /* Limit quadratic behaviour. */
1429 nr_comparisons++;
1430 if (nr_comparisons > max_comparisons)
1431 break;
1433 /* This is a conservative dependency check. We could test more
1434 precise for allowed replacement direction. */
1435 if (!deps_ok_for_redirect (bb1, bb2))
1436 continue;
1438 if (!(same_phi_alternatives (same_succ, bb1, bb2)))
1439 continue;
1441 find_duplicate (same_succ, bb1, bb2);
1446 /* Find clusters of bbs which can be merged. */
1448 static void
1449 find_clusters (void)
1451 same_succ same;
1453 while (!worklist.is_empty ())
1455 same = worklist.pop ();
1456 same->in_worklist = false;
1457 if (dump_file && (dump_flags & TDF_DETAILS))
1459 fprintf (dump_file, "processing worklist entry\n");
1460 same_succ_print (dump_file, same);
1462 find_clusters_1 (same);
1466 /* Returns the vop phi of BB, if any. */
1468 static gimple
1469 vop_phi (basic_block bb)
1471 gimple stmt;
1472 gimple_stmt_iterator gsi;
1473 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1475 stmt = gsi_stmt (gsi);
1476 if (! virtual_operand_p (gimple_phi_result (stmt)))
1477 continue;
1478 return stmt;
1480 return NULL;
1483 /* Redirect all edges from BB1 to BB2, removes BB1 and marks it as removed. */
1485 static void
1486 replace_block_by (basic_block bb1, basic_block bb2)
1488 edge pred_edge;
1489 edge e1, e2;
1490 edge_iterator ei;
1491 unsigned int i;
1492 gimple bb2_phi;
1494 bb2_phi = vop_phi (bb2);
1496 /* Mark the basic block as deleted. */
1497 mark_basic_block_deleted (bb1);
1499 /* Redirect the incoming edges of bb1 to bb2. */
1500 for (i = EDGE_COUNT (bb1->preds); i > 0 ; --i)
1502 pred_edge = EDGE_PRED (bb1, i - 1);
1503 pred_edge = redirect_edge_and_branch (pred_edge, bb2);
1504 gcc_assert (pred_edge != NULL);
1506 if (bb2_phi == NULL)
1507 continue;
1509 /* The phi might have run out of capacity when the redirect added an
1510 argument, which means it could have been replaced. Refresh it. */
1511 bb2_phi = vop_phi (bb2);
1513 add_phi_arg (bb2_phi, SSA_NAME_VAR (gimple_phi_result (bb2_phi)),
1514 pred_edge, UNKNOWN_LOCATION);
1517 bb2->frequency += bb1->frequency;
1518 if (bb2->frequency > BB_FREQ_MAX)
1519 bb2->frequency = BB_FREQ_MAX;
1521 bb2->count += bb1->count;
1523 /* Merge the outgoing edge counts from bb1 onto bb2. */
1524 gcov_type out_sum = 0;
1525 FOR_EACH_EDGE (e1, ei, bb1->succs)
1527 e2 = find_edge (bb2, e1->dest);
1528 gcc_assert (e2);
1529 e2->count += e1->count;
1530 out_sum += e2->count;
1532 /* Recompute the edge probabilities from the new merged edge count.
1533 Use the sum of the new merged edge counts computed above instead
1534 of bb2's merged count, in case there are profile count insanities
1535 making the bb count inconsistent with the edge weights. */
1536 FOR_EACH_EDGE (e2, ei, bb2->succs)
1538 e2->probability = GCOV_COMPUTE_SCALE (e2->count, out_sum);
1541 /* Do updates that use bb1, before deleting bb1. */
1542 release_last_vdef (bb1);
1543 same_succ_flush_bb (bb1);
1545 delete_basic_block (bb1);
1548 /* Bbs for which update_debug_stmt need to be called. */
1550 static bitmap update_bbs;
1552 /* For each cluster in all_clusters, merge all cluster->bbs. Returns
1553 number of bbs removed. */
1555 static int
1556 apply_clusters (void)
1558 basic_block bb1, bb2;
1559 bb_cluster c;
1560 unsigned int i, j;
1561 bitmap_iterator bj;
1562 int nr_bbs_removed = 0;
1564 for (i = 0; i < all_clusters.length (); ++i)
1566 c = all_clusters[i];
1567 if (c == NULL)
1568 continue;
1570 bb2 = c->rep_bb;
1571 bitmap_set_bit (update_bbs, bb2->index);
1573 bitmap_clear_bit (c->bbs, bb2->index);
1574 EXECUTE_IF_SET_IN_BITMAP (c->bbs, 0, j, bj)
1576 bb1 = BASIC_BLOCK_FOR_FN (cfun, j);
1577 bitmap_clear_bit (update_bbs, bb1->index);
1579 replace_block_by (bb1, bb2);
1580 nr_bbs_removed++;
1584 return nr_bbs_removed;
1587 /* Resets debug statement STMT if it has uses that are not dominated by their
1588 defs. */
1590 static void
1591 update_debug_stmt (gimple stmt)
1593 use_operand_p use_p;
1594 ssa_op_iter oi;
1595 basic_block bbdef, bbuse;
1596 gimple def_stmt;
1597 tree name;
1599 if (!gimple_debug_bind_p (stmt))
1600 return;
1602 bbuse = gimple_bb (stmt);
1603 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE)
1605 name = USE_FROM_PTR (use_p);
1606 gcc_assert (TREE_CODE (name) == SSA_NAME);
1608 def_stmt = SSA_NAME_DEF_STMT (name);
1609 gcc_assert (def_stmt != NULL);
1611 bbdef = gimple_bb (def_stmt);
1612 if (bbdef == NULL || bbuse == bbdef
1613 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef))
1614 continue;
1616 gimple_debug_bind_reset_value (stmt);
1617 update_stmt (stmt);
1621 /* Resets all debug statements that have uses that are not
1622 dominated by their defs. */
1624 static void
1625 update_debug_stmts (void)
1627 basic_block bb;
1628 bitmap_iterator bi;
1629 unsigned int i;
1631 EXECUTE_IF_SET_IN_BITMAP (update_bbs, 0, i, bi)
1633 gimple stmt;
1634 gimple_stmt_iterator gsi;
1636 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1637 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1639 stmt = gsi_stmt (gsi);
1640 if (!is_gimple_debug (stmt))
1641 continue;
1642 update_debug_stmt (stmt);
1647 /* Runs tail merge optimization. */
1649 unsigned int
1650 tail_merge_optimize (unsigned int todo)
1652 int nr_bbs_removed_total = 0;
1653 int nr_bbs_removed;
1654 bool loop_entered = false;
1655 int iteration_nr = 0;
1656 int max_iterations = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_ITERATIONS);
1658 if (!flag_tree_tail_merge
1659 || max_iterations == 0
1660 /* We try to be conservative with respect to loop structure, since:
1661 - the cases where tail-merging could both affect loop structure and be
1662 beneficial are rare,
1663 - it prevents us from having to fixup the loops using
1664 loops_state_set (LOOPS_NEED_FIXUP), and
1665 - keeping loop structure may allow us to simplify the pass.
1666 In order to be conservative, we need loop information. In rare cases
1667 (about 7 test-cases in the g++ testsuite) there is none (because
1668 loop_optimizer_finalize has been called before tail-merge, and
1669 PROP_loops is not set), so we bail out. */
1670 || current_loops == NULL)
1671 return 0;
1673 timevar_push (TV_TREE_TAIL_MERGE);
1675 if (!dom_info_available_p (CDI_DOMINATORS))
1677 /* PRE can leave us with unreachable blocks, remove them now. */
1678 delete_unreachable_blocks ();
1679 calculate_dominance_info (CDI_DOMINATORS);
1681 init_worklist ();
1683 while (!worklist.is_empty ())
1685 if (!loop_entered)
1687 loop_entered = true;
1688 alloc_cluster_vectors ();
1689 update_bbs = BITMAP_ALLOC (NULL);
1691 else
1692 reset_cluster_vectors ();
1694 iteration_nr++;
1695 if (dump_file && (dump_flags & TDF_DETAILS))
1696 fprintf (dump_file, "worklist iteration #%d\n", iteration_nr);
1698 find_clusters ();
1699 gcc_assert (worklist.is_empty ());
1700 if (all_clusters.is_empty ())
1701 break;
1703 nr_bbs_removed = apply_clusters ();
1704 nr_bbs_removed_total += nr_bbs_removed;
1705 if (nr_bbs_removed == 0)
1706 break;
1708 free_dominance_info (CDI_DOMINATORS);
1710 if (iteration_nr == max_iterations)
1711 break;
1713 calculate_dominance_info (CDI_DOMINATORS);
1714 update_worklist ();
1717 if (dump_file && (dump_flags & TDF_DETAILS))
1718 fprintf (dump_file, "htab collision / search: %f\n",
1719 same_succ_htab.collisions ());
1721 if (nr_bbs_removed_total > 0)
1723 if (MAY_HAVE_DEBUG_STMTS)
1725 calculate_dominance_info (CDI_DOMINATORS);
1726 update_debug_stmts ();
1729 if (dump_file && (dump_flags & TDF_DETAILS))
1731 fprintf (dump_file, "Before TODOs.\n");
1732 dump_function_to_file (current_function_decl, dump_file, dump_flags);
1735 todo |= (TODO_verify_ssa | TODO_verify_stmts | TODO_verify_flow);
1736 mark_virtual_operands_for_renaming (cfun);
1739 delete_worklist ();
1740 if (loop_entered)
1742 delete_cluster_vectors ();
1743 BITMAP_FREE (update_bbs);
1746 timevar_pop (TV_TREE_TAIL_MERGE);
1748 return todo;